quantvn 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of quantvn might be problematic. Click here for more details.

@@ -0,0 +1,1281 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ stocks.py — Lấy OHLCV từ Entrade API với pivot/fill logic.
5
+
6
+ Public:
7
+ - list_liquid_asset()
8
+ - get_hist(asset_name, resolution="m") # resolution: "m" | "h" | "1H" | "1D"
9
+
10
+ Đặc điểm:
11
+ - Lấy data 1 request duy nhất từ Entrade API
12
+ - Pivot và forward fill để xử lý missing data
13
+ - Convert timezone sang UTC+7 (Vietnam)
14
+ - Output: DataFrame ["Date","time","Open","High","Low","Close","volume"]
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import datetime as dt
20
+ import io
21
+ import itertools
22
+ import json
23
+ from typing import Any, Dict, List, Optional, Union
24
+
25
+ import pandas as pd
26
+ import requests
27
+
28
+ from .const import (
29
+ CHART_URL,
30
+ GRAPHQL_URL,
31
+ INTERVAL_MAP,
32
+ INTRADAY_MAP,
33
+ INTRADAY_URL,
34
+ OHLC_COLUMNS,
35
+ OHLC_RENAME,
36
+ PRICE_DEPTH_URL,
37
+ PRICE_INFO_MAP,
38
+ TRADING_URL,
39
+ )
40
+ from .core import send_request
41
+ from .utils import Config
42
+
43
+ __all__ = ["list_liquid_asset", "get_hist"]
44
+
45
+ # ===== Cấu hình nguồn Entrade API =====
46
+ _STOCKS_API_BASE = "https://services.entrade.com.vn/chart-api/v2/ohlcs/stock"
47
+ _TIMEOUT = 30 # giây
48
+ _MAX_REQUESTS = 2000 # giới hạn an toàn số lần phân trang
49
+
50
+ # Giữ cho API cũ list_liquid_asset (nếu bạn dùng ở nơi khác)
51
+ LAMBDA_URL = Config.get_link()
52
+
53
+
54
+ def list_liquid_asset() -> pd.DataFrame:
55
+ """Retrieve a list of highly liquid assets (qua Lambda cũ)."""
56
+ api_key = Config.get_api_key()
57
+ r = requests.get(
58
+ f"{LAMBDA_URL}/list-liquid-asset",
59
+ headers={"x-api-key": api_key},
60
+ timeout=_TIMEOUT,
61
+ )
62
+ r.raise_for_status()
63
+ return pd.DataFrame(r.json())
64
+
65
+
66
+ # ===================== Helpers: Parse & Chuẩn hóa =====================
67
+
68
+
69
+ def _json_relaxed(text: str) -> Optional[Union[dict, list]]:
70
+ """
71
+ Thử parse JSON "chịu lỗi" theo 3 bước:
72
+ 1) json.loads toàn bộ
73
+ 2) Cắt substring giữa ký tự JSON đầu/cuối (lọc rác log) rồi loads
74
+ 3) NDJSON: mỗi dòng 1 JSON
75
+ Trả về dict/list nếu parse được, ngược lại None.
76
+ """
77
+ try:
78
+ return json.loads(text)
79
+ except json.JSONDecodeError:
80
+ pass
81
+
82
+ starts = [i for i in [text.find("{"), text.find("[")] if i != -1]
83
+ ends = [i for i in [text.rfind("}"), text.rfind("]")] if i != -1]
84
+ if starts and ends and max(ends) > min(starts):
85
+ s, e = min(starts), max(ends) + 1
86
+ try:
87
+ return json.loads(text[s:e])
88
+ except json.JSONDecodeError:
89
+ pass
90
+
91
+ items = []
92
+ for line in text.splitlines():
93
+ s = line.strip()
94
+ if not s:
95
+ continue
96
+ try:
97
+ items.append(json.loads(s))
98
+ except json.JSONDecodeError:
99
+ continue
100
+ if items:
101
+ if isinstance(items[0], list):
102
+ out = []
103
+ for it in items:
104
+ if isinstance(it, list):
105
+ out.extend(it)
106
+ return out
107
+ return items
108
+
109
+ return None
110
+
111
+
112
+ def _scan_all_json_blocks(text: str) -> List[Any]:
113
+ """
114
+ Quét *tất cả* khối JSON (object/array) nối tiếp trong text (fix lỗi 'Extra data').
115
+ Trả về danh sách các object đã parse (dict/list). Bỏ qua block lỗi.
116
+ """
117
+ s = text.lstrip()
118
+ i, n = 0, len(s)
119
+ blocks: List[Any] = []
120
+
121
+ while i < n:
122
+ while i < n and s[i] not in "{[":
123
+ i += 1
124
+ if i >= n:
125
+ break
126
+
127
+ opening = s[i]
128
+ closing = "}" if opening == "{" else "]"
129
+ depth = 0
130
+ in_str = False
131
+ esc = False
132
+ j = i
133
+
134
+ while j < n:
135
+ ch = s[j]
136
+ if in_str:
137
+ if esc:
138
+ esc = False
139
+ elif ch == "\\":
140
+ esc = True
141
+ elif ch == '"':
142
+ in_str = False
143
+ else:
144
+ if ch == '"':
145
+ in_str = True
146
+ elif ch == opening:
147
+ depth += 1
148
+ elif ch == closing:
149
+ depth -= 1
150
+ if depth == 0:
151
+ j += 1
152
+ break
153
+ j += 1
154
+
155
+ if depth != 0:
156
+ break
157
+
158
+ block = s[i:j]
159
+ try:
160
+ obj = json.loads(block)
161
+ blocks.append(obj)
162
+ except Exception:
163
+ pass
164
+ i = j
165
+
166
+ return blocks
167
+
168
+
169
+ def _merge_ohlcv_dict_blocks(blocks: List[dict]) -> dict:
170
+ """Gộp nhiều dict kiểu {t,o,h,l,c,(v)} thành một dict duy nhất (append theo chiều dọc)."""
171
+ keys = set().union(*[set(b.keys()) for b in blocks])
172
+ merged: Dict[str, List[Any]] = {}
173
+ for k in keys:
174
+ buf: List[Any] = []
175
+ for b in blocks:
176
+ v = b.get(k, None)
177
+ if isinstance(v, list):
178
+ buf.extend(v)
179
+ else:
180
+ if k == "v":
181
+ buf.extend([None] * len(b.get("t", [])))
182
+ if buf:
183
+ merged[k] = buf
184
+ return merged
185
+
186
+
187
+ def _as_dataframe(parsed: Any, raw_text: str) -> pd.DataFrame:
188
+ """
189
+ Đưa bất kỳ cấu trúc phổ biến nào về DataFrame:
190
+ - dict-of-arrays {t,o,h,l,c,(v)}
191
+ - list-of-dicts
192
+ - list-of-lists (để pandas đoán)
193
+ - CSV fallback
194
+ """
195
+ if parsed is not None:
196
+ if (
197
+ isinstance(parsed, list)
198
+ and parsed
199
+ and all(
200
+ isinstance(b, dict) and {"t", "o", "h", "l", "c"}.issubset(b.keys())
201
+ for b in parsed
202
+ )
203
+ ):
204
+ parsed = _merge_ohlcv_dict_blocks(parsed)
205
+
206
+ if isinstance(parsed, dict):
207
+ if "data" in parsed:
208
+ parsed = parsed["data"]
209
+ if isinstance(parsed, dict) and {"t", "o", "h", "l", "c"}.issubset(
210
+ parsed.keys()
211
+ ):
212
+ n = len(parsed["t"])
213
+ vol = parsed.get("v", [None] * n)
214
+ return pd.DataFrame(
215
+ {
216
+ "t": parsed["t"],
217
+ "o": parsed["o"],
218
+ "h": parsed["h"],
219
+ "l": parsed["l"],
220
+ "c": parsed["c"],
221
+ "v": vol,
222
+ }
223
+ )
224
+ try:
225
+ return pd.DataFrame(parsed)
226
+ except Exception:
227
+ return pd.DataFrame([parsed])
228
+
229
+ if isinstance(parsed, list):
230
+ if not parsed:
231
+ return pd.DataFrame()
232
+ if isinstance(parsed[0], dict):
233
+ return pd.DataFrame(parsed)
234
+ return pd.DataFrame(parsed)
235
+
236
+ try:
237
+ return pd.read_csv(io.StringIO(raw_text))
238
+ except Exception:
239
+ return pd.DataFrame()
240
+
241
+
242
+ def _flatten_if_cell_is_list(df: pd.DataFrame) -> pd.DataFrame:
243
+ """Nếu mỗi ô chứa list (ví dụ cột 't' là list epoch), flatten thành từng dòng."""
244
+ if df.empty:
245
+ return df
246
+ cols = set(df.columns)
247
+ tcol = _pick(cols, "t", "time", "timestamp", "ts", "date", "dt")
248
+ if tcol is None:
249
+ return df
250
+
251
+ first = df.iloc[0][tcol]
252
+ if not isinstance(first, (list, tuple)):
253
+ return df # đã phẳng
254
+
255
+ def chain(series):
256
+ seqs = [x for x in series.dropna().tolist() if isinstance(x, (list, tuple))]
257
+ return list(itertools.chain.from_iterable(seqs)) if seqs else []
258
+
259
+ t = chain(df[tcol])
260
+ n = len(t)
261
+
262
+ def vals(name_candidates):
263
+ c = _pick(set(df.columns), *name_candidates)
264
+ if c is None:
265
+ return [None] * n
266
+ v = chain(df[c])
267
+ return (v[:n] + [None] * max(0, n - len(v))) if v else [None] * n
268
+
269
+ out = pd.DataFrame(
270
+ {
271
+ "t": t,
272
+ "o": vals(("o", "open", "Open")),
273
+ "h": vals(("h", "high", "High")),
274
+ "l": vals(("l", "low", "Low")),
275
+ "c": vals(("c", "close", "Close")),
276
+ "v": vals(("v", "vol", "volume", "Volume")),
277
+ }
278
+ )
279
+ return out
280
+
281
+
282
+ def _pick(cols, *cands):
283
+ for c in cands:
284
+ if c in cols:
285
+ return c
286
+ return None
287
+
288
+
289
+ def _normalize_ohlcv_df(df: pd.DataFrame) -> pd.DataFrame:
290
+ """
291
+ Chuẩn hóa về cột: Date (datetime), Open, High, Low, Close, Volume
292
+ - Tự nhận epoch giây/ms
293
+ - KHÔNG đổi timezone
294
+ """
295
+ if df.empty:
296
+ return pd.DataFrame(columns=["Date", "Open", "High", "Low", "Close", "Volume"])
297
+
298
+ ren = {}
299
+ if "t" in df.columns:
300
+ ren["t"] = "Date"
301
+ if "time" in df.columns:
302
+ ren["time"] = "Date"
303
+ for a, b in [
304
+ ("o", "Open"),
305
+ ("open", "Open"),
306
+ ("h", "High"),
307
+ ("high", "High"),
308
+ ("l", "Low"),
309
+ ("low", "Low"),
310
+ ("c", "Close"),
311
+ ("close", "Close"),
312
+ ("v", "Volume"),
313
+ ("vol", "Volume"),
314
+ ("Volume", "Volume"),
315
+ ]:
316
+ if a in df.columns:
317
+ ren[a] = b
318
+ df = df.rename(columns=ren)
319
+
320
+ for col in ["Date", "Open", "High", "Low", "Close", "Volume"]:
321
+ if col not in df.columns:
322
+ df[col] = pd.NA
323
+
324
+ s = df["Date"]
325
+ if pd.api.types.is_numeric_dtype(s):
326
+ unit = "ms" if (s.dropna().astype("int64") > 1_000_000_000_000).any() else "s"
327
+ # Kết quả là timezone-naive (không đổi UTC)
328
+ df["Date"] = pd.to_datetime(s, unit=unit)
329
+ else:
330
+ df["Date"] = pd.to_datetime(s, errors="coerce")
331
+
332
+ for col in ["Open", "High", "Low", "Close", "Volume"]:
333
+ df[col] = pd.to_numeric(df[col], errors="coerce")
334
+
335
+ df = df.dropna(subset=["Date"]).sort_values("Date").reset_index(drop=True)
336
+ return df[["Date", "Open", "High", "Low", "Close", "Volume"]]
337
+
338
+
339
+ def _format_date_time_output(df: pd.DataFrame) -> pd.DataFrame:
340
+ """
341
+ Input: df có cột Date (datetime), Open/High/Low/Close/Volume
342
+ Output: Date (YYYY-MM-DD), time (HH:MM:SS), Open.., volume (lowercase)
343
+ KHÔNG đổi timezone; chỉ format từ datetime hiện có.
344
+ """
345
+ if df.empty:
346
+ return pd.DataFrame(
347
+ columns=["Date", "time", "Open", "High", "Low", "Close", "volume"]
348
+ )
349
+
350
+ out = df.copy()
351
+ out["Date"] = pd.to_datetime(out["Date"]).dt.strftime("%Y-%m-%d")
352
+ out["time"] = pd.to_datetime(
353
+ out["Date"] + " " + pd.to_datetime(df["Date"]).dt.strftime("%H:%M:%S")
354
+ ).dt.strftime("%H:%M:%S")
355
+ # Cách trên đảm bảo "time" lấy từ phần giờ gốc; không chuyển TZ.
356
+
357
+ if "Volume" in out.columns:
358
+ out = out.rename(columns={"Volume": "volume"})
359
+
360
+ for col in ["Open", "High", "Low", "Close", "volume"]:
361
+ if col in out.columns:
362
+ out[col] = pd.to_numeric(out[col], errors="coerce")
363
+
364
+ out = out[["Date", "time", "Open", "High", "Low", "Close", "volume"]]
365
+ out = out.sort_values(["Date", "time"], kind="mergesort").reset_index(drop=True)
366
+ return out
367
+
368
+
369
+ def _extract_last_epoch(df_seg: pd.DataFrame) -> Optional[int]:
370
+ """Lấy epoch giây cuối cùng của đoạn df_seg (sau normalize)."""
371
+ if df_seg.empty:
372
+ return None
373
+ ns = pd.to_datetime(df_seg["Date"]).astype("int64").max()
374
+ return int(ns // 1_000_000_000)
375
+
376
+
377
+ # ===================== Fetch từng trang thời gian =====================
378
+
379
+
380
+ def _fetch_entrade_data(symbol: str, resolution: str) -> pd.DataFrame:
381
+ """
382
+ Lấy toàn bộ OHLCV data từ Entrade API (1 request).
383
+ Returns raw DataFrame với columns: t, o, h, l, c, v
384
+ """
385
+ url = (
386
+ f"{_STOCKS_API_BASE}?from=0"
387
+ f"&resolution={resolution}"
388
+ f"&symbol={symbol}"
389
+ f"&to=9999999999"
390
+ )
391
+
392
+ try:
393
+ response = requests.get(url, timeout=_TIMEOUT)
394
+ response.raise_for_status()
395
+ data = response.json()
396
+
397
+ # Entrade trả về list of dicts hoặc dict of arrays
398
+ if isinstance(data, list):
399
+ df = pd.DataFrame(data)
400
+ elif isinstance(data, dict):
401
+ df = pd.DataFrame(data)
402
+ else:
403
+ return pd.DataFrame()
404
+
405
+ # Lấy 6 cột đầu: t, o, h, l, c, v
406
+ if len(df.columns) >= 6:
407
+ df = df.iloc[:, :6]
408
+ df.columns = ["t", "o", "h", "l", "c", "v"]
409
+
410
+ return df
411
+
412
+ except Exception as e:
413
+ raise ValueError(f"Failed to fetch data from Entrade: {e}")
414
+
415
+
416
+ # ===================== Reorganized: Company & Finance =====================
417
+
418
+
419
+ class Company:
420
+ """Company information via TCBS tcanalysis endpoints."""
421
+
422
+ def __init__(self, symbol):
423
+ self.symbol = symbol
424
+
425
+ def overview(self):
426
+ BASE = "https://apipubaws.tcbs.com.vn"
427
+ ANALYSIS = "tcanalysis"
428
+ url = f"{BASE}/{ANALYSIS}/v1/ticker/{self.symbol}/overview"
429
+ data = send_request(url)
430
+ return pd.DataFrame(data, index=[0])
431
+
432
+ def profile(self):
433
+ BASE = "https://apipubaws.tcbs.com.vn"
434
+ ANALYSIS = "tcanalysis"
435
+ url = f"{BASE}/{ANALYSIS}/v1/company/{self.symbol}/overview"
436
+ data = send_request(url)
437
+ return pd.json_normalize(data)
438
+
439
+ def shareholders(self, page_size=50, page=0):
440
+ BASE = "https://apipubaws.tcbs.com.vn"
441
+ ANALYSIS = "tcanalysis"
442
+ url = f"{BASE}/{ANALYSIS}/v1/company/{self.symbol}/large-share-holders"
443
+ data = send_request(url, params={"page": page, "size": page_size})
444
+ items = (data or {}).get("listShareHolder", [])
445
+ return pd.json_normalize(items)
446
+
447
+ def officers(self, page_size=50, page=0):
448
+ BASE = "https://apipubaws.tcbs.com.vn"
449
+ ANALYSIS = "tcanalysis"
450
+ url = f"{BASE}/{ANALYSIS}/v1/company/{self.symbol}/key-officers"
451
+ data = send_request(url, params={"page": page, "size": page_size})
452
+ items = (data or {}).get("listKeyOfficer", [])
453
+ return pd.json_normalize(items)
454
+
455
+ def subsidiaries(self, page_size=100, page=0):
456
+ BASE = "https://apipubaws.tcbs.com.vn"
457
+ ANALYSIS = "tcanalysis"
458
+ url = f"{BASE}/{ANALYSIS}/v1/company/{self.symbol}/sub-companies"
459
+ data = send_request(url, params={"page": page, "size": page_size})
460
+ items = (data or {}).get("listSubCompany", [])
461
+ return pd.json_normalize(items)
462
+
463
+ def events(self, page_size=15, page=0):
464
+ BASE = "https://apipubaws.tcbs.com.vn"
465
+ ANALYSIS = "tcanalysis"
466
+ url = f"{BASE}/{ANALYSIS}/v1/ticker/{self.symbol}/events-news"
467
+ data = send_request(url, params={"page": page, "size": page_size})
468
+ items = (data or {}).get("listEventNews", [])
469
+ return pd.DataFrame(items)
470
+
471
+ def news(self, page_size=15, page=0):
472
+ BASE = "https://apipubaws.tcbs.com.vn"
473
+ ANALYSIS = "tcanalysis"
474
+ url = f"{BASE}/{ANALYSIS}/v1/ticker/{self.symbol}/activity-news"
475
+ data = send_request(url, params={"page": page, "size": page_size})
476
+ items = (data or {}).get("listActivityNews", [])
477
+ return pd.DataFrame(items)
478
+
479
+ def ratio_summary(self):
480
+ BASE = "https://apipubaws.tcbs.com.vn"
481
+ ANALYSIS = "tcanalysis"
482
+ url = f"{BASE}/{ANALYSIS}/v1/ticker/{self.symbol}/ratios"
483
+ try:
484
+ data = send_request(url)
485
+ return (
486
+ pd.DataFrame(data, index=[0])
487
+ if isinstance(data, dict)
488
+ else pd.DataFrame(data)
489
+ )
490
+ except Exception:
491
+ url2 = f"{BASE}/{ANALYSIS}/v1/finance/{self.symbol}/financialratio"
492
+ data = send_request(url2)
493
+ return pd.DataFrame(data)
494
+
495
+
496
+ FIN_MAP = {
497
+ "income_statement": "incomestatement",
498
+ "balance_sheet": "balancesheet",
499
+ "cash_flow": "cashflow",
500
+ }
501
+ PERIOD_MAP = {"year": 1, "quarter": 0}
502
+
503
+
504
+ class Finance:
505
+ """Financial statements via TCBS."""
506
+
507
+ def __init__(self, symbol):
508
+ self.symbol = symbol
509
+
510
+ def _fetch(self, report, period="year", lang="vi", dropna=False):
511
+ BASE = "https://apipubaws.tcbs.com.vn"
512
+ ANALYSIS = "tcanalysis"
513
+ assert report in FIN_MAP, f"Invalid report: {report}"
514
+ url = f"{BASE}/{ANALYSIS}/v1/finance/{self.symbol}/{FIN_MAP[report]}"
515
+ params = {"period": PERIOD_MAP.get(period, 1), "size": 1000}
516
+ data = send_request(url, params=params)
517
+ df = pd.DataFrame(data)
518
+ if dropna:
519
+ df = df.dropna(axis=1, how="all")
520
+ return df
521
+
522
+ def income_statement(self, period="year", lang="vi", dropna=False):
523
+ return self._fetch("income_statement", period, lang, dropna)
524
+
525
+ def balance_sheet(self, period="year", lang="vi", dropna=False):
526
+ return self._fetch("balance_sheet", period, lang, dropna)
527
+
528
+ def cash_flow(self, period="year", lang="vi", dropna=False):
529
+ return self._fetch("cash_flow", period, lang, dropna)
530
+
531
+
532
+ # ===================== Reorganized: Fund =====================
533
+
534
+
535
+ class Fund:
536
+ """Mutual funds via Fmarket."""
537
+
538
+ def __init__(self):
539
+ pass
540
+
541
+ def listing(self, fund_type: str = "") -> pd.DataFrame:
542
+ BASE = "https://api.fmarket.vn/res/products"
543
+ url = f"{BASE}/filter"
544
+ payload = {
545
+ "types": ["NEW_FUND", "TRADING_FUND"],
546
+ "issuerIds": [],
547
+ "sortOrder": "DESC",
548
+ "sortField": "navTo6Months",
549
+ "page": 1,
550
+ "pageSize": 500,
551
+ "isIpo": False,
552
+ "fundAssetTypes": [] if not fund_type else [fund_type],
553
+ "bondRemainPeriods": [],
554
+ "searchField": "",
555
+ "isBuyByReward": False,
556
+ "thirdAppIds": [],
557
+ }
558
+ try:
559
+ data = send_request(url, method="POST", payload=payload)
560
+ rows = (data or {}).get("data", {}).get("rows", [])
561
+ df = pd.json_normalize(rows)
562
+ return df
563
+ except Exception:
564
+ data = send_request(f"{BASE}/public", params={"page": 1, "size": 500})
565
+ df = pd.json_normalize((data or {}).get("data", []))
566
+ if fund_type and "dataFundAssetType.name" in df.columns:
567
+ df = df[df["dataFundAssetType.name"].eq(fund_type)]
568
+ return df
569
+
570
+ def filter(self, q: str) -> pd.DataFrame:
571
+ df = self.listing()
572
+ if df.empty:
573
+ return df
574
+ mask = False
575
+ for col in [c for c in ["name", "shortName"] if c in df.columns]:
576
+ mask = mask | df[col].astype(str).str.contains(q, case=False, na=False)
577
+ return df[mask]
578
+
579
+ @staticmethod
580
+ def _resolve_candidates(code_or_id: str) -> list[str]:
581
+ cands = []
582
+ key = str(code_or_id).strip()
583
+ if key:
584
+ cands.append(key)
585
+ try:
586
+ _df = Fund().listing()
587
+ if not _df.empty:
588
+ cols = _df.columns
589
+
590
+ def _add(val):
591
+ if val is None:
592
+ return
593
+ s = str(val).strip()
594
+ if s and s not in cands:
595
+ cands.append(s)
596
+
597
+ if "code" in cols and _df["code"].notna().any():
598
+ m = _df["code"].astype(str).str.upper().eq(key.upper())
599
+ if m.any():
600
+ r = _df[m].iloc[0]
601
+ for k in ["code", "id", "vsdFeeId"]:
602
+ if k in cols:
603
+ _add(r.get(k))
604
+ if "id" in cols and _df["id"].notna().any():
605
+ m = _df["id"].astype(str).eq(key)
606
+ if m.any():
607
+ r = _df[m].iloc[0]
608
+ for k in ["code", "id", "vsdFeeId"]:
609
+ if k in cols:
610
+ _add(r.get(k))
611
+ if "vsdFeeId" in cols and _df["vsdFeeId"].notna().any():
612
+ m = _df["vsdFeeId"].astype(str).eq(key)
613
+ if m.any():
614
+ r = _df[m].iloc[0]
615
+ for k in ["code", "id", "vsdFeeId"]:
616
+ if k in cols:
617
+ _add(r.get(k))
618
+ except Exception:
619
+ pass
620
+ return cands
621
+
622
+ @staticmethod
623
+ def _try_paths(paths: list[str]) -> pd.DataFrame:
624
+ for url in paths:
625
+ try:
626
+ data = send_request(url)
627
+ if isinstance(data, list):
628
+ return pd.DataFrame(data)
629
+ return pd.json_normalize(data)
630
+ except Exception:
631
+ continue
632
+ return pd.DataFrame()
633
+
634
+ class details:
635
+ @staticmethod
636
+ def nav_report(code_or_id: str) -> pd.DataFrame:
637
+ BASE = "https://api.fmarket.vn/res/products"
638
+ cands = Fund._resolve_candidates(code_or_id)
639
+ paths = [f"{BASE}/public/{c}/nav-report" for c in cands] + [
640
+ f"{BASE}/{c}/nav-report" for c in cands
641
+ ]
642
+ return Fund._try_paths(paths)
643
+
644
+ @staticmethod
645
+ def top_holding(code_or_id: str) -> pd.DataFrame:
646
+ BASE = "https://api.fmarket.vn/res/products"
647
+ cands = Fund._resolve_candidates(code_or_id)
648
+ paths = [f"{BASE}/public/{c}/top-holding" for c in cands] + [
649
+ f"{BASE}/{c}/top-holding" for c in cands
650
+ ]
651
+ return Fund._try_paths(paths)
652
+
653
+ @staticmethod
654
+ def industry_holding(code_or_id: str) -> pd.DataFrame:
655
+ BASE = "https://api.fmarket.vn/res/products"
656
+ cands = Fund._resolve_candidates(code_or_id)
657
+ paths = [f"{BASE}/public/{c}/industry-holding" for c in cands] + [
658
+ f"{BASE}/{c}/industry-holding" for c in cands
659
+ ]
660
+ return Fund._try_paths(paths)
661
+
662
+ @staticmethod
663
+ def asset_holding(code_or_id: str) -> pd.DataFrame:
664
+ BASE = "https://api.fmarket.vn/res/products"
665
+ cands = Fund._resolve_candidates(code_or_id)
666
+ paths = [f"{BASE}/public/{c}/asset-holding" for c in cands] + [
667
+ f"{BASE}/{c}/asset-holding" for c in cands
668
+ ]
669
+ return Fund._try_paths(paths)
670
+
671
+
672
+ # ===================== Reorganized: Listing =====================
673
+
674
+
675
+ class Listing:
676
+ def __init__(self, source="VCI"):
677
+ self.source = source
678
+
679
+ def all_symbols(self):
680
+ # Try to use liquid asset list if available
681
+ try:
682
+ df = list_liquid_asset()
683
+ if not df.empty:
684
+ cols = set(df.columns)
685
+ sym_col = (
686
+ "symbol"
687
+ if "symbol" in cols
688
+ else ("ticker" if "ticker" in cols else None)
689
+ )
690
+ ex_col = "exchange" if "exchange" in cols else None
691
+ if sym_col:
692
+ out = pd.DataFrame(
693
+ {
694
+ "symbol": df[sym_col].astype(str),
695
+ "short_name": df.get(
696
+ "short_name", pd.Series([None] * len(df))
697
+ ),
698
+ "exchange": (
699
+ df[ex_col]
700
+ if ex_col in df.columns
701
+ else pd.Series([None] * len(df))
702
+ ),
703
+ }
704
+ )
705
+ return out.dropna(subset=["symbol"]).reset_index(drop=True)
706
+ except Exception:
707
+ pass
708
+ # Fallback minimal known set
709
+ return pd.DataFrame(
710
+ [
711
+ {"symbol": "HPG", "short_name": "HoaPhat", "exchange": "HOSE"},
712
+ {"symbol": "VIC", "short_name": "Vingroup", "exchange": "HOSE"},
713
+ {"symbol": "VNM", "short_name": "Vinamilk", "exchange": "HOSE"},
714
+ ]
715
+ )
716
+
717
+ def symbols_by_exchange(self):
718
+ df = self.all_symbols()
719
+ if not df.empty and "exchange" in df.columns:
720
+ out: dict[str, list[str]] = {"HOSE": [], "HNX": [], "UPCOM": []}
721
+ for ex, g in df.groupby(df["exchange"].fillna("HOSE")):
722
+ if ex in out:
723
+ out[ex] = g["symbol"].astype(str).dropna().unique().tolist()
724
+ return out
725
+ return {"HOSE": ["HPG", "VIC", "VNM"], "HNX": [], "UPCOM": []}
726
+
727
+ def symbols_by_group(self, group="VN30"):
728
+ return []
729
+
730
+ def symbols_by_industries(self):
731
+ return pd.DataFrame(columns=["symbol", "icb_industry"])
732
+
733
+ def industries_icb(self):
734
+ return pd.DataFrame(columns=["icb_code", "icb_name"])
735
+
736
+
737
+ # ===================== Reorganized: Market Quote (VCI) =====================
738
+
739
+
740
+ class Quote:
741
+ """Market data via VCI: OHLCV history, intraday tick, price depth."""
742
+
743
+ def __init__(self, symbol, source="VCI"):
744
+ self.symbol = symbol
745
+ self.source = source
746
+
747
+ def _estimate_countback(self, start_dt, end_dt, interval):
748
+ if interval in ["1D", "1W", "1M"]:
749
+ if interval == "1D":
750
+ return max(1, (end_dt.date() - start_dt.date()).days + 1)
751
+ if interval == "1W":
752
+ return max(1, ((end_dt.date() - start_dt.date()).days // 7) + 1)
753
+ return max(
754
+ 1,
755
+ (end_dt.year - start_dt.year) * 12
756
+ + (end_dt.month - start_dt.month)
757
+ + 1,
758
+ )
759
+ if interval == "1H":
760
+ return max(1, int((end_dt - start_dt).total_seconds() // 3600) + 1)
761
+ step = {"1m": 1, "5m": 5, "15m": 15, "30m": 30}[interval]
762
+ return max(1, int((end_dt - start_dt).total_seconds() // 60) // step + 1)
763
+
764
+ def history(self, start, end=None, interval="1D"):
765
+ assert interval in INTERVAL_MAP, f"Unsupported interval: {interval}"
766
+ start_dt = dt.datetime.strptime(start, "%Y-%m-%d")
767
+ end_dt = (
768
+ dt.datetime.utcnow() + pd.Timedelta(days=1)
769
+ if end is None
770
+ else (dt.datetime.strptime(end, "%Y-%m-%d") + pd.Timedelta(days=1))
771
+ )
772
+ count_back = self._estimate_countback(start_dt, end_dt, interval)
773
+ payload = {
774
+ "timeFrame": INTERVAL_MAP[interval],
775
+ "symbols": [self.symbol],
776
+ "to": int(end_dt.timestamp()),
777
+ "countBack": count_back,
778
+ }
779
+ data = send_request(TRADING_URL + CHART_URL, method="POST", payload=payload)
780
+ arr = data[0] if isinstance(data, list) and data else []
781
+ if not arr:
782
+ return pd.DataFrame(
783
+ columns=["time", "open", "high", "low", "close", "volume"]
784
+ )
785
+
786
+ df = pd.DataFrame(arr)[OHLC_COLUMNS].rename(columns=OHLC_RENAME)
787
+ ts = pd.to_numeric(df["time"], errors="coerce")
788
+ df["time"] = pd.to_datetime(ts, unit="s")
789
+ df = df[df["time"] >= start_dt].reset_index(drop=True)
790
+ return df
791
+
792
+ def intraday(self, page_size=100, last_time=None):
793
+ url = f"{TRADING_URL}{INTRADAY_URL}/LEData/getAll"
794
+ payload = {
795
+ "symbol": self.symbol,
796
+ "limit": int(page_size),
797
+ "truncTime": last_time,
798
+ }
799
+ data = send_request(url, method="POST", payload=payload)
800
+ if not data:
801
+ return pd.DataFrame(columns=list(INTRADAY_MAP.values()))
802
+
803
+ df = pd.DataFrame(data)
804
+ cols = list(INTRADAY_MAP.keys())
805
+ df = df[cols].rename(columns=INTRADAY_MAP)
806
+
807
+ vals = pd.to_numeric(df["time"], errors="coerce")
808
+ if vals.notna().any():
809
+ unit = "ms" if vals.dropna().astype("int64").gt(10**12).any() else "s"
810
+ df["time"] = pd.to_datetime(vals, unit=unit)
811
+ else:
812
+ df["time"] = pd.to_datetime(df["time"], errors="coerce")
813
+
814
+ return df
815
+
816
+ def price_depth(self):
817
+ data = send_request(
818
+ PRICE_DEPTH_URL, method="POST", payload={"symbol": self.symbol}
819
+ )
820
+ if not data:
821
+ return pd.DataFrame(
822
+ columns=[
823
+ "price",
824
+ "acc_volume",
825
+ "acc_buy_volume",
826
+ "acc_sell_volume",
827
+ "acc_undefined_volume",
828
+ ]
829
+ )
830
+ df = pd.DataFrame(data)
831
+ df = df[
832
+ [
833
+ "priceStep",
834
+ "accumulatedVolume",
835
+ "accumulatedBuyVolume",
836
+ "accumulatedSellVolume",
837
+ "accumulatedUndefinedVolume",
838
+ ]
839
+ ]
840
+ return df.rename(
841
+ columns={
842
+ "priceStep": "price",
843
+ "accumulatedVolume": "acc_volume",
844
+ "accumulatedBuyVolume": "acc_buy_volume",
845
+ "accumulatedSellVolume": "acc_sell_volume",
846
+ "accumulatedUndefinedVolume": "acc_undefined_volume",
847
+ }
848
+ )
849
+
850
+
851
+ # ===================== Reorganized: Global quotes (MSN/Yahoo) =====================
852
+
853
+ MSN_HEADERS = {
854
+ "User-Agent": (
855
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
856
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
857
+ "Chrome/123.0.0.0 Safari/537.36"
858
+ ),
859
+ "Accept": "application/json, text/plain, */*",
860
+ "Accept-Language": "en-US,en;q=0.9",
861
+ "Origin": "https://www.msn.com",
862
+ "Referer": "https://www.msn.com/en-us/money",
863
+ }
864
+
865
+ CURRENCY_ID = {
866
+ "USDVND": "avyufr",
867
+ "JPYVND": "ave8sm",
868
+ "EURUSD": "av932w",
869
+ "USDCNY": "avym77",
870
+ "USDKRW": "avyoyc",
871
+ }
872
+ CRYPTO_ID = {
873
+ "BTC": "c2111",
874
+ "ETH": "c2112",
875
+ "USDT": "c2115",
876
+ "BNB": "c2113",
877
+ "ADA": "c2114",
878
+ "SOL": "c2116",
879
+ }
880
+ INDICES_ID = {
881
+ "DJI": "a6qja2",
882
+ "INX": "a33k6h",
883
+ "COMP": "a3oxnm",
884
+ "N225": "a9j7bh",
885
+ "VNI": "aqk2nm",
886
+ }
887
+ Y_INDICES = {
888
+ "DJI": "^DJI",
889
+ "INX": "^GSPC",
890
+ "COMP": "^IXIC",
891
+ "N225": "^N225",
892
+ "VNI": "^VNINDEX",
893
+ }
894
+ Y_CRYPTO = {
895
+ "BTC": "BTC-USD",
896
+ "ETH": "ETH-USD",
897
+ "BNB": "BNB-USD",
898
+ "ADA": "ADA-USD",
899
+ "SOL": "SOL-USD",
900
+ }
901
+
902
+
903
+ def _normalize_df_global(df: pd.DataFrame) -> pd.DataFrame:
904
+ for col in ["time", "open", "high", "low", "close", "volume"]:
905
+ if col not in df.columns:
906
+ df[col] = None
907
+ return df[["time", "open", "high", "low", "close", "volume"]].reset_index(drop=True)
908
+
909
+
910
+ def _chart_msn(symbol_id, start=None, end=None, interval="1D") -> pd.DataFrame:
911
+ BASE = "https://assets.msn.com/service/Finance"
912
+ url = f"{BASE}/Charts/TimeRange"
913
+ params = {
914
+ "ids": symbol_id,
915
+ "type": "All",
916
+ "timeframe": 1,
917
+ "wrapodata": "false",
918
+ "ocid": "finance-utils-peregrine",
919
+ "cm": "en-us",
920
+ "it": "web",
921
+ "scn": "ANON",
922
+ }
923
+ data = send_request(url, params=params, headers=MSN_HEADERS)
924
+ series = None
925
+ if isinstance(data, list) and data and isinstance(data[0], dict):
926
+ series = data[0].get("series") or (
927
+ data[0].get("charts", [{}])[0].get("series")
928
+ if data[0].get("charts")
929
+ else None
930
+ )
931
+ elif isinstance(data, dict):
932
+ series = data.get("series") or (
933
+ data.get("charts", [{}])[0].get("series") if data.get("charts") else None
934
+ )
935
+ if not series:
936
+ return pd.DataFrame(columns=["time", "open", "high", "low", "close", "volume"])
937
+ if isinstance(series, list):
938
+ df = pd.DataFrame(series)
939
+ else:
940
+ df = pd.DataFrame([series])
941
+ rename = {
942
+ "timeStamps": "time",
943
+ "openPrices": "open",
944
+ "pricesHigh": "high",
945
+ "pricesLow": "low",
946
+ "prices": "close",
947
+ "volumes": "volume",
948
+ }
949
+ df.rename(
950
+ columns={k: v for k, v in rename.items() if k in df.columns}, inplace=True
951
+ )
952
+ for col in ["time", "open", "high", "low", "close", "volume"]:
953
+ if (
954
+ col in df.columns
955
+ and df[col].apply(lambda x: isinstance(x, (list, tuple))).any()
956
+ ):
957
+ df = df.explode(col)
958
+ df["time"] = pd.to_numeric(df.get("time"), errors="coerce")
959
+ df["time"] = pd.to_datetime(df["time"], unit="s", errors="coerce")
960
+ return _normalize_df_global(df)
961
+
962
+
963
+ def _yahoo_symbol(kind: str, symbol: str) -> list[str]:
964
+ if kind == "fx":
965
+ return [f"{symbol}=X", f"{symbol[:3]}{symbol[3:]}=X"]
966
+ if kind == "crypto":
967
+ return [Y_CRYPTO.get(symbol, f"{symbol}-USD")]
968
+ if kind == "index":
969
+ return [Y_INDICES.get(symbol, symbol)]
970
+ return [symbol]
971
+
972
+
973
+ def _interval_map_yahoo(interval: str) -> tuple[str, str]:
974
+ if interval in ("1m", "5m", "15m", "30m", "60m", "1H"):
975
+ return ("1mo", "1m")
976
+ if interval in ("1W",):
977
+ return ("6mo", "1d")
978
+ if interval in ("1M",):
979
+ return ("2y", "1d")
980
+ return ("1y", "1d")
981
+
982
+
983
+ def _chart_yahoo(
984
+ kind: str, symbol: str, start=None, end=None, interval="1D"
985
+ ) -> pd.DataFrame:
986
+ rng, itv = _interval_map_yahoo(interval)
987
+ for ysym in _yahoo_symbol(kind, symbol):
988
+ try:
989
+ url = f"https://query1.finance.yahoo.com/v8/finance/chart/{ysym}"
990
+ params = {
991
+ "range": rng,
992
+ "interval": itv,
993
+ "includePrePost": "false",
994
+ "events": "div,splits",
995
+ }
996
+ data = send_request(
997
+ url,
998
+ params=params,
999
+ headers={
1000
+ "User-Agent": MSN_HEADERS["User-Agent"],
1001
+ "Accept": "application/json, text/plain, */*",
1002
+ },
1003
+ )
1004
+ res = (data or {}).get("chart", {}).get("result", [])
1005
+ if not res:
1006
+ continue
1007
+ r0 = res[0]
1008
+ ts = r0.get("timestamp", []) or r0.get("meta", {}).get(
1009
+ "regularTradingPeriod", []
1010
+ )
1011
+ ind = r0.get("indicators", {})
1012
+ q = (ind.get("quote") or [{}])[0]
1013
+ df = pd.DataFrame(
1014
+ {
1015
+ "time": pd.to_datetime(ts, unit="s", errors="coerce"),
1016
+ "open": q.get("open"),
1017
+ "high": q.get("high"),
1018
+ "low": q.get("low"),
1019
+ "close": q.get("close"),
1020
+ "volume": q.get("volume"),
1021
+ }
1022
+ )
1023
+ if start:
1024
+ df = df[df["time"] >= pd.to_datetime(start)]
1025
+ if end:
1026
+ df = df[df["time"] <= pd.to_datetime(end)]
1027
+ if not df.empty:
1028
+ return _normalize_df_global(df)
1029
+ except Exception:
1030
+ continue
1031
+ return pd.DataFrame(columns=["time", "open", "high", "low", "close", "volume"])
1032
+
1033
+
1034
+ class _Wrap:
1035
+ def __init__(self, id_map, kind: str):
1036
+ self.id_map = id_map
1037
+ self.kind = kind
1038
+
1039
+ class _Quote:
1040
+ def __init__(self, sid, kind, raw_symbol):
1041
+ self.sid = sid
1042
+ self.kind = kind
1043
+ self.raw_symbol = raw_symbol
1044
+
1045
+ def history(self, start, end, interval="1D"):
1046
+ try:
1047
+ df = _chart_msn(self.sid, start, end, interval)
1048
+ if df is not None and not df.empty:
1049
+ return df
1050
+ except Exception:
1051
+ pass
1052
+ return _chart_yahoo(self.kind, self.raw_symbol, start, end, interval)
1053
+
1054
+ def __call__(self, symbol):
1055
+ sid = self.id_map.get(symbol)
1056
+ return type("Obj", (), {"quote": self._Quote(sid, self.kind, symbol)})()
1057
+
1058
+
1059
+ class FX:
1060
+ def __init__(self):
1061
+ self._wrap = _Wrap(CURRENCY_ID, "fx")
1062
+
1063
+ def __call__(self, symbol):
1064
+ return self._wrap(symbol)
1065
+
1066
+
1067
+ class Crypto:
1068
+ def __init__(self):
1069
+ self._wrap = _Wrap(CRYPTO_ID, "crypto")
1070
+
1071
+ def __call__(self, symbol):
1072
+ return self._wrap(symbol)
1073
+
1074
+
1075
+ class WorldIndex:
1076
+ def __init__(self):
1077
+ self._wrap = _Wrap(INDICES_ID, "index")
1078
+
1079
+ def __call__(self, symbol):
1080
+ return self._wrap(symbol)
1081
+
1082
+
1083
+ class Global:
1084
+ def fx(self, symbol):
1085
+ return FX()(symbol)
1086
+
1087
+ def crypto(self, symbol):
1088
+ return Crypto()(symbol)
1089
+
1090
+ def world_index(self, symbol):
1091
+ return WorldIndex()(symbol)
1092
+
1093
+
1094
+ class MSN(Global):
1095
+ pass
1096
+
1097
+
1098
+ # ===================== Reorganized: Trading =====================
1099
+
1100
+ _PRICEBOARD_QUERY = """
1101
+ query PriceBoard($tickers:[String!]){
1102
+ priceBoard(tickers:$tickers){
1103
+ ticker open_price ceiling_price floor_price reference_price
1104
+ highest_price lowest_price price_change percent_price_change
1105
+ foreign_total_volume foreign_total_room foreign_holding_room
1106
+ average_match_volume2_week
1107
+ }
1108
+ }
1109
+ """
1110
+
1111
+
1112
+ class Trading:
1113
+ @staticmethod
1114
+ def _fallback(symbols):
1115
+ rows = []
1116
+ now = pd.Timestamp.utcnow()
1117
+ start = (now - pd.Timedelta(days=10)).strftime("%Y-%m-%d")
1118
+ end = now.strftime("%Y-%m-%d")
1119
+ for sym in symbols:
1120
+ try:
1121
+ q = Quote(sym)
1122
+ tick = q.intraday(page_size=1)
1123
+ price = float(tick["price"].iloc[0]) if not tick.empty else None
1124
+ hist = q.history(start=start, end=end, interval="1D")
1125
+ if len(hist) >= 2:
1126
+ ref = float(hist["close"].iloc[-2])
1127
+ elif len(hist) == 1:
1128
+ ref = float(hist["close"].iloc[-1])
1129
+ else:
1130
+ ref = None
1131
+ change = (
1132
+ (price - ref) if (price is not None and ref is not None) else None
1133
+ )
1134
+ pct = (
1135
+ (change / ref * 100.0)
1136
+ if (change is not None and ref not in (None, 0))
1137
+ else None
1138
+ )
1139
+ rows.append(
1140
+ {
1141
+ "symbol": sym,
1142
+ "open": None,
1143
+ "ceiling": None,
1144
+ "floor": None,
1145
+ "ref_price": ref,
1146
+ "high": None,
1147
+ "low": None,
1148
+ "price_change": change,
1149
+ "price_change_pct": pct,
1150
+ "foreign_volume": None,
1151
+ "foreign_room": None,
1152
+ "foreign_holding_room": None,
1153
+ "avg_match_volume_2w": None,
1154
+ }
1155
+ )
1156
+ except Exception:
1157
+ rows.append(
1158
+ {
1159
+ "symbol": sym,
1160
+ "open": None,
1161
+ "ceiling": None,
1162
+ "floor": None,
1163
+ "ref_price": None,
1164
+ "high": None,
1165
+ "low": None,
1166
+ "price_change": None,
1167
+ "price_change_pct": None,
1168
+ "foreign_volume": None,
1169
+ "foreign_room": None,
1170
+ "foreign_holding_room": None,
1171
+ "avg_match_volume_2w": None,
1172
+ }
1173
+ )
1174
+ return pd.DataFrame(rows)
1175
+
1176
+ @staticmethod
1177
+ def price_board(symbols):
1178
+ payload = {
1179
+ "operationName": "PriceBoard",
1180
+ "query": _PRICEBOARD_QUERY,
1181
+ "variables": {"tickers": list(symbols)},
1182
+ }
1183
+ try:
1184
+ data = send_request(
1185
+ GRAPHQL_URL,
1186
+ method="POST",
1187
+ headers={"Content-Type": "application/json"},
1188
+ payload=payload,
1189
+ )
1190
+ rows = (data or {}).get("data", {}).get("priceBoard", [])
1191
+ if rows:
1192
+ df = pd.DataFrame(rows).rename(columns=PRICE_INFO_MAP)
1193
+ return df
1194
+ except Exception:
1195
+ pass
1196
+ return Trading._fallback(symbols)
1197
+
1198
+
1199
+ # ===================== Public: get_hist =====================
1200
+
1201
+
1202
+ def get_hist(asset_name: str, resolution: str = "m") -> pd.DataFrame:
1203
+ """
1204
+ Lấy toàn bộ OHLCV từ Entrade API với pivot/fill logic để xử lý missing data.
1205
+
1206
+ Parameters:
1207
+ -----------
1208
+ asset_name : str
1209
+ Mã cổ phiếu (ví dụ: "HPG", "VNM")
1210
+ resolution : str
1211
+ Khung thời gian: "m" (1 phút), "h" (1 giờ), "1H", "1D", etc.
1212
+ Mặc định: "m"
1213
+
1214
+ Returns:
1215
+ --------
1216
+ DataFrame với columns: ["Date", "time", "Open", "High", "Low", "Close", "volume"]
1217
+ """
1218
+ if not isinstance(asset_name, str) or not asset_name.strip():
1219
+ raise ValueError("asset_name phải là chuỗi hợp lệ (ví dụ: 'HPG').")
1220
+
1221
+ # Map resolution to Entrade format
1222
+ res_map = {
1223
+ "m": "1", # 1 minute
1224
+ "h": "1H", # 1 hour
1225
+ "1h": "1H",
1226
+ "1d": "1D",
1227
+ "d": "1D",
1228
+ }
1229
+ res = res_map.get((resolution or "m").lower(), resolution)
1230
+ symbol = asset_name.strip().upper()
1231
+
1232
+ # Lấy raw data từ Entrade (1 request)
1233
+ df = _fetch_entrade_data(symbol, res)
1234
+
1235
+ if df.empty:
1236
+ return pd.DataFrame(
1237
+ columns=["Date", "time", "Open", "High", "Low", "Close", "volume"]
1238
+ )
1239
+
1240
+ # Convert timestamp sang datetime UTC+7 (Vietnam timezone)
1241
+ df["t"] = (
1242
+ df["t"]
1243
+ .astype(int)
1244
+ .apply(lambda x: dt.datetime.utcfromtimestamp(x) + dt.timedelta(hours=7))
1245
+ )
1246
+
1247
+ # Đổi tên columns
1248
+ df.columns = ["Date", "Open", "High", "Low", "Close", "Volume"]
1249
+
1250
+ # Tách Date và time
1251
+ df["time"] = df["Date"].astype(str).str[11:] # HH:MM:SS
1252
+ df["Date"] = df["Date"].astype(str).str[:10] # YYYY-MM-DD
1253
+
1254
+ # Pivot Close theo Date x time để fill missing data
1255
+ close_pivot = df.pivot(index="Date", columns="time", values="Close")
1256
+ close_pivot = close_pivot.dropna(axis=1, thresh=len(close_pivot) - 30)
1257
+ close_pivot = close_pivot.ffill(axis=1)
1258
+ close_stacked = close_pivot.stack().reset_index()
1259
+ close_stacked.columns = ["Date", "time", "Close"]
1260
+
1261
+ # Pivot Volume theo Date x time
1262
+ volume_pivot = df.pivot(index="Date", columns="time", values="Volume")
1263
+ volume_pivot = volume_pivot.dropna(axis=1, thresh=len(volume_pivot) - 30)
1264
+ volume_pivot = volume_pivot.ffill(axis=1)
1265
+ volume_stacked = volume_pivot.stack().reset_index()
1266
+
1267
+ # Merge Close và Volume
1268
+ close_stacked["volume"] = volume_stacked.iloc[:, 2]
1269
+
1270
+ # Merge với Open, High, Low từ data gốc
1271
+ merged = pd.merge(
1272
+ close_stacked[["Date", "time", "Close", "volume"]],
1273
+ df[["Date", "time", "Open", "High", "Low"]],
1274
+ on=["Date", "time"],
1275
+ how="left",
1276
+ )
1277
+
1278
+ # Sắp xếp và forward fill các giá trị còn thiếu
1279
+ ohlc = merged[["Date", "time", "Open", "High", "Low", "Close", "volume"]].ffill()
1280
+
1281
+ return ohlc