MultiFactor 0.2.7__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {multifactor-0.2.7 → multifactor-0.2.9}/PKG-INFO +1 -1
- {multifactor-0.2.7 → multifactor-0.2.9}/pyproject.toml +1 -1
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/us_stockinfo.py +46 -5
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor.egg-info/PKG-INFO +1 -1
- {multifactor-0.2.7 → multifactor-0.2.9}/README.md +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/setup.cfg +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/__init__.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/core.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/momentum.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/momentum_one.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/quality.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/quality_one.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/score.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/score_adj_weight.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/stockinfo.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/us_core.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/us_momentum.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/us_quality.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/us_value.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/value.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor/value_one.py +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor.egg-info/SOURCES.txt +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor.egg-info/dependency_links.txt +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor.egg-info/requires.txt +0 -0
- {multifactor-0.2.7 → multifactor-0.2.9}/src/MultiFactor.egg-info/top_level.txt +0 -0
|
@@ -3,6 +3,30 @@ import FinanceDataReader as fdr
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
|
+
def clean_us_symbol(symbol):
|
|
7
|
+
"""
|
|
8
|
+
미국 주식 심볼 표준화 (yfinance 호환 및 데이터 병합 일관성을 위해)
|
|
9
|
+
예: BRK.B, BRK/B, BRKB -> BRK-B
|
|
10
|
+
BF.B, BF/B, BFB -> BF-B
|
|
11
|
+
"""
|
|
12
|
+
if not isinstance(symbol, str):
|
|
13
|
+
return symbol
|
|
14
|
+
symbol = symbol.strip().upper()
|
|
15
|
+
symbol = symbol.replace('/', '-').replace('.', '-')
|
|
16
|
+
|
|
17
|
+
# yfinance에서 하이픈(-) 구분자를 사용하는 대표적인 클래스 주식의 루트 심볼 목록
|
|
18
|
+
# 구분자 없이 붙어있는 경우(예: BRKB, BFB)만 선별적으로 변환하여
|
|
19
|
+
# ALB(알베말), APA(APA Corp), BA(보잉), FOXA(폭스), NWSA(뉴스콥) 등의 오탐지를 방지합니다.
|
|
20
|
+
hyphen_roots = {'BRK', 'BF', 'LEN', 'STZ', 'HEI', 'JW', 'MOG'}
|
|
21
|
+
|
|
22
|
+
for root in hyphen_roots:
|
|
23
|
+
if symbol == f"{root}A":
|
|
24
|
+
return f"{root}-A"
|
|
25
|
+
elif symbol == f"{root}B":
|
|
26
|
+
return f"{root}-B"
|
|
27
|
+
|
|
28
|
+
return symbol
|
|
29
|
+
|
|
6
30
|
def get_us_stockinfo(N=500):
|
|
7
31
|
"""
|
|
8
32
|
미국 종목정보 추출 (GitHub 시가총액 데이터 + FDR 상세정보 병합)
|
|
@@ -12,6 +36,7 @@ def get_us_stockinfo(N=500):
|
|
|
12
36
|
try:
|
|
13
37
|
# 1. GitHub에서 최신 S&P 500 데이터 로드 (시가총액 등 순위용)
|
|
14
38
|
df = pd.read_csv(url)
|
|
39
|
+
df['symbol'] = df['symbol'].astype(str).apply(clean_us_symbol)
|
|
15
40
|
|
|
16
41
|
# 2. FDR에서 KR/US 전체 상장 종목 정보 로드 (명칭 일관성 유지)
|
|
17
42
|
df_sp500 = fdr.StockListing('S&P500')
|
|
@@ -20,11 +45,27 @@ def get_us_stockinfo(N=500):
|
|
|
20
45
|
|
|
21
46
|
# 3. FDR 데이터 병합 및 중복 제거
|
|
22
47
|
fdr_stocks = pd.concat([df_sp500, df_nasdaq, df_nyse])
|
|
48
|
+
fdr_stocks['Symbol'] = fdr_stocks['Symbol'].astype(str).apply(clean_us_symbol)
|
|
23
49
|
fdr_stocks = fdr_stocks.drop_duplicates(subset='Symbol')
|
|
24
50
|
|
|
25
|
-
#
|
|
26
|
-
|
|
27
|
-
|
|
51
|
+
# FDR Sector 이름을 GitHub CSV의 industry 대분류 형태로 변환하기 위한 맵핑
|
|
52
|
+
sector_mapping = {
|
|
53
|
+
'Information Technology': 'Technology',
|
|
54
|
+
'Financials': 'Finance',
|
|
55
|
+
'Materials': 'Basic Materials',
|
|
56
|
+
'Communication Services': 'Telecommunications'
|
|
57
|
+
}
|
|
58
|
+
fdr_stocks['Sector_mapped'] = fdr_stocks['Sector'].map(sector_mapping).fillna(fdr_stocks['Sector'])
|
|
59
|
+
|
|
60
|
+
# 4. Symbol 기준으로 조인하여 FDR의 Name 및 맵핑된 Sector(대분류)를 가져옴
|
|
61
|
+
df = df.merge(
|
|
62
|
+
fdr_stocks[['Symbol', 'Name', 'Sector_mapped']],
|
|
63
|
+
left_on='symbol', right_on='Symbol',
|
|
64
|
+
how='left'
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# GitHub CSV의 industry 컬럼에 결측치(NaN)가 있으면 FDR에서 맵핑한 Sector로 보완
|
|
68
|
+
df['industry'] = df['industry'].fillna(df['Sector_mapped'])
|
|
28
69
|
|
|
29
70
|
# 5. 컬럼명을 기존 시스템과 동일하게 변경 (GitHub의 name 대신 FDR의 Name 사용)
|
|
30
71
|
column_map = {
|
|
@@ -56,9 +97,9 @@ def get_us_stockinfo(N=500):
|
|
|
56
97
|
if 'Symbol' in df.columns:
|
|
57
98
|
df = df.rename(columns={'Symbol': 'Code'})
|
|
58
99
|
|
|
59
|
-
# 7. 종목코드 클렌징 (yfinance 호환을 위해
|
|
100
|
+
# 7. 종목코드 클렌징 (yfinance 호환을 위해 표준화 적용)
|
|
60
101
|
if 'Code' in df.columns:
|
|
61
|
-
df['Code'] = df['Code'].str.
|
|
102
|
+
df['Code'] = df['Code'].astype(str).apply(clean_us_symbol)
|
|
62
103
|
|
|
63
104
|
# 8. 상위 N개 추출 및 인덱스 초기화
|
|
64
105
|
df = df.head(N).reset_index(drop=True)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|