db2_hj3415 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- db2_hj3415-0.1.0/PKG-INFO +22 -0
- db2_hj3415-0.1.0/README.md +9 -0
- db2_hj3415-0.1.0/db2_hj3415/__init__.py +0 -0
- db2_hj3415-0.1.0/db2_hj3415/cli/__init__.py +0 -0
- db2_hj3415-0.1.0/db2_hj3415/cli/db.py +0 -0
- db2_hj3415-0.1.0/db2_hj3415/common/__init__.py +0 -0
- db2_hj3415-0.1.0/db2_hj3415/common/connection.py +17 -0
- db2_hj3415-0.1.0/db2_hj3415/common/db_ops.py +5 -0
- db2_hj3415-0.1.0/db2_hj3415/common/utils.py +1 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/__init__.py +2 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/_ops.py +117 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/aud.py +23 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/chf.py +26 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/gbond3y.py +26 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/gold.py +26 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/kosdaq.py +26 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/kospi.py +26 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/silver.py +26 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/sp500.py +26 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/usdidx.py +21 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/usdkrw.py +26 -0
- db2_hj3415-0.1.0/db2_hj3415/mi/wti.py +26 -0
- db2_hj3415-0.1.0/db2_hj3415/nfs/__init__.py +2 -0
- db2_hj3415-0.1.0/db2_hj3415/nfs/_c10346.py +198 -0
- db2_hj3415-0.1.0/db2_hj3415/nfs/_ops.py +42 -0
- db2_hj3415-0.1.0/db2_hj3415/nfs/c101.py +124 -0
- db2_hj3415-0.1.0/db2_hj3415/nfs/c103.py +38 -0
- db2_hj3415-0.1.0/db2_hj3415/nfs/c104.py +38 -0
- db2_hj3415-0.1.0/db2_hj3415/nfs/c106.py +38 -0
- db2_hj3415-0.1.0/db2_hj3415/nfs/c108.py +77 -0
- db2_hj3415-0.1.0/db2_hj3415/nfs/dart.py +0 -0
- db2_hj3415-0.1.0/db2_hj3415/redis/__init__.py +0 -0
- db2_hj3415-0.1.0/pyproject.toml +26 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: db2_hj3415
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Gathering the stock data by playwright
|
5
|
+
Author-email: Hyungjin Kim <hj3415@gmail.com>
|
6
|
+
Description-Content-Type: text/markdown
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
8
|
+
Requires-Dist: motor
|
9
|
+
Requires-Dist: pandas
|
10
|
+
Requires-Dist: deepdiff
|
11
|
+
Requires-Dist: utils_hj3415>=3.2.3
|
12
|
+
Project-URL: Home, https://www.hyungjin.kr
|
13
|
+
|
14
|
+
### db2-hj3415
|
15
|
+
|
16
|
+
#### Introduction
|
17
|
+
hj3415를 위한 mongo 데이터베이스 패키지
|
18
|
+
|
19
|
+
#### Notice
|
20
|
+
패키지내의 각 폴더는 db명, 파일은 컬렉션 단위로 구성한다.
|
21
|
+
redi폴더는 레디스캐시관련 모듈
|
22
|
+
common 공통 유틸함수
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# MongoDB/Redis 연결
|
2
|
+
import os
|
3
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
4
|
+
|
5
|
+
# 싱글톤 몽고 클라이언트 정의
|
6
|
+
MONGO_URI = os.getenv("MONGO_ADDR", "mongodb://localhost:27017")
|
7
|
+
client: AsyncIOMotorClient = None
|
8
|
+
|
9
|
+
def get_mongo_client() -> AsyncIOMotorClient:
|
10
|
+
global client
|
11
|
+
if client is None:
|
12
|
+
client = AsyncIOMotorClient(MONGO_URI)
|
13
|
+
return client
|
14
|
+
|
15
|
+
def close_mongo_client():
|
16
|
+
if client:
|
17
|
+
client.close()
|
@@ -0,0 +1 @@
|
|
1
|
+
# 자주 쓰는 간단한 유틸 함수
|
@@ -0,0 +1,117 @@
|
|
1
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
2
|
+
from datetime import datetime
|
3
|
+
from pymongo import UpdateOne
|
4
|
+
import pandas as pd
|
5
|
+
|
6
|
+
from db2_hj3415.mi import DB_NAME, DATE_FORMAT
|
7
|
+
from db2_hj3415.common.db_ops import get_collection
|
8
|
+
|
9
|
+
from utils_hj3415 import setup_logger
|
10
|
+
|
11
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
12
|
+
|
13
|
+
|
14
|
+
async def _save_one_collection(collection_name: str, doc: dict, client: AsyncIOMotorClient):
|
15
|
+
"""단일 컬렉션에 대해 날짜 기준으로 문서 저장."""
|
16
|
+
try:
|
17
|
+
collection = get_collection(client, DB_NAME, collection_name)
|
18
|
+
date_str = doc.get("날짜")
|
19
|
+
if not date_str:
|
20
|
+
print(f"{collection_name}: 날짜 없음, 저장 건너뜀")
|
21
|
+
return
|
22
|
+
|
23
|
+
date_obj = datetime.strptime(date_str, DATE_FORMAT)
|
24
|
+
doc["날짜"] = date_obj
|
25
|
+
mylogger.debug(f"{collection_name} - 원본 날짜 문자열: {date_str}")
|
26
|
+
|
27
|
+
await collection.create_index("날짜", unique=True)
|
28
|
+
|
29
|
+
result = await collection.update_one(
|
30
|
+
{"날짜": date_obj},
|
31
|
+
{"$set": doc},
|
32
|
+
upsert=True
|
33
|
+
)
|
34
|
+
|
35
|
+
status = "삽입" if result.upserted_id else "업데이트"
|
36
|
+
print(f"{collection_name}: {status}")
|
37
|
+
|
38
|
+
except Exception as e:
|
39
|
+
print(f"{collection_name}: 오류 - {e}")
|
40
|
+
|
41
|
+
|
42
|
+
async def save(data: dict[str, dict], client: AsyncIOMotorClient):
|
43
|
+
'''
|
44
|
+
전체 mi collection 데이터를 전달받아 저장하는 함수
|
45
|
+
'''
|
46
|
+
for collection_name, doc in data.items():
|
47
|
+
await _save_one_collection(collection_name, doc, client)
|
48
|
+
|
49
|
+
|
50
|
+
async def find(col: str, date_str: str, client: AsyncIOMotorClient):
|
51
|
+
collection = get_collection(client, DB_NAME, col)
|
52
|
+
date_obj = datetime.strptime(date_str, DATE_FORMAT)
|
53
|
+
doc = await collection.find_one({"날짜": date_obj})
|
54
|
+
mylogger.debug(f"{col} 날짜 타입 확인:", doc["날짜"], repr(doc["날짜"]))
|
55
|
+
if doc:
|
56
|
+
print(f"조회 결과 ({col}): {doc}")
|
57
|
+
else:
|
58
|
+
print(f"{col} 컬렉션에 {date_str} 날짜 데이터 없음")
|
59
|
+
|
60
|
+
|
61
|
+
async def delete(col: str, date_str: str, client: AsyncIOMotorClient):
|
62
|
+
collection = get_collection(client, DB_NAME, col)
|
63
|
+
date_obj = datetime.strptime(date_str, DATE_FORMAT)
|
64
|
+
result = await collection.delete_one({"날짜": date_obj})
|
65
|
+
if result.deleted_count > 0:
|
66
|
+
print(f"{col}: {date_str} 데이터 삭제 완료")
|
67
|
+
else:
|
68
|
+
print(f"{col}: 삭제할 데이터 없음")
|
69
|
+
|
70
|
+
|
71
|
+
async def _save_market_history_type1(df: pd.DataFrame, market: str, numeric_columns: list | None, client: AsyncIOMotorClient):
|
72
|
+
if df.empty:
|
73
|
+
print("빈 데이터프레임입니다.")
|
74
|
+
return {"inserted": 0, "updated": 0}
|
75
|
+
|
76
|
+
db = client[DB_NAME]
|
77
|
+
collection = db[market]
|
78
|
+
|
79
|
+
# 컬럼 정리
|
80
|
+
df.columns = df.columns.str.strip()
|
81
|
+
|
82
|
+
# 날짜 파싱
|
83
|
+
try:
|
84
|
+
df["날짜"] = pd.to_datetime(df["날짜"], format=DATE_FORMAT, utc=True)
|
85
|
+
except Exception as e:
|
86
|
+
print(f"날짜 파싱 실패: {e}")
|
87
|
+
return {"inserted": 0, "updated": 0}
|
88
|
+
|
89
|
+
# 숫자형 변환
|
90
|
+
if numeric_columns:
|
91
|
+
for col in numeric_columns:
|
92
|
+
if col in df.columns:
|
93
|
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
94
|
+
|
95
|
+
# dict 변환
|
96
|
+
records = df.to_dict(orient="records")
|
97
|
+
|
98
|
+
# 인덱스 (1회만)
|
99
|
+
await collection.create_index("날짜", unique=True)
|
100
|
+
|
101
|
+
# upsert 준비
|
102
|
+
operations = []
|
103
|
+
for r in records:
|
104
|
+
if "날짜" not in r:
|
105
|
+
print("날짜 필드 없음 - 건너뜀:", r)
|
106
|
+
continue
|
107
|
+
operations.append(UpdateOne({"날짜": r["날짜"]}, {"$set": r}, upsert=True))
|
108
|
+
|
109
|
+
# 실행
|
110
|
+
if operations:
|
111
|
+
result = await collection.bulk_write(operations)
|
112
|
+
print(f"{market}: upsert 완료 - 삽입 {result.upserted_count}, 수정 {result.modified_count}")
|
113
|
+
return {"inserted": result.upserted_count, "updated": result.modified_count}
|
114
|
+
else:
|
115
|
+
print("실행할 작업이 없습니다.")
|
116
|
+
return {"inserted": 0, "updated": 0}
|
117
|
+
|
@@ -0,0 +1,23 @@
|
|
1
|
+
from db2_hj3415.mi import _ops
|
2
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "aud"
|
10
|
+
|
11
|
+
async def save(data: dict, client: AsyncIOMotorClient):
|
12
|
+
return await _ops._save_one_collection(COL_NAME, data, client)
|
13
|
+
|
14
|
+
async def find(date_str: str, client: AsyncIOMotorClient):
|
15
|
+
return await _ops.find(COL_NAME, date_str, client)
|
16
|
+
|
17
|
+
async def delete(date_str: str, client: AsyncIOMotorClient):
|
18
|
+
return await _ops.delete(COL_NAME, date_str, client)
|
19
|
+
|
20
|
+
async def save_history(df: pd.DataFrame, client: AsyncIOMotorClient):
|
21
|
+
numeric_columns = ["종가", "전일대비"]
|
22
|
+
await _ops._save_market_history_type1(df, COL_NAME, numeric_columns, client)
|
23
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
from db2_hj3415.mi import _ops
|
2
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "chf"
|
10
|
+
|
11
|
+
|
12
|
+
async def save(data: dict, client: AsyncIOMotorClient):
|
13
|
+
return await _ops._save_one_collection(COL_NAME, data, client)
|
14
|
+
|
15
|
+
|
16
|
+
async def find(date_str: str, client: AsyncIOMotorClient):
|
17
|
+
return await _ops.find(COL_NAME, date_str, client)
|
18
|
+
|
19
|
+
|
20
|
+
async def delete(date_str: str, client: AsyncIOMotorClient):
|
21
|
+
return await _ops.delete(COL_NAME, date_str, client)
|
22
|
+
|
23
|
+
|
24
|
+
async def save_history(df: pd.DataFrame, client: AsyncIOMotorClient):
|
25
|
+
numeric_columns = ["종가", "전일대비"]
|
26
|
+
await _ops._save_market_history_type1(df, COL_NAME, numeric_columns, client)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
from db2_hj3415.mi import _ops
|
2
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "gbond3y"
|
10
|
+
|
11
|
+
|
12
|
+
async def save(data: dict, client: AsyncIOMotorClient):
|
13
|
+
return await _ops._save_one_collection(COL_NAME, data, client)
|
14
|
+
|
15
|
+
|
16
|
+
async def find(date_str: str, client: AsyncIOMotorClient):
|
17
|
+
return await _ops.find(COL_NAME, date_str, client)
|
18
|
+
|
19
|
+
|
20
|
+
async def delete(date_str: str, client: AsyncIOMotorClient):
|
21
|
+
return await _ops.delete(COL_NAME, date_str, client)
|
22
|
+
|
23
|
+
|
24
|
+
async def save_history(df: pd.DataFrame, client: AsyncIOMotorClient):
|
25
|
+
numeric_columns = ["종가", "전일대비"]
|
26
|
+
await _ops._save_market_history_type1(df, COL_NAME, numeric_columns, client)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
from db2_hj3415.mi import _ops
|
2
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "gold"
|
10
|
+
|
11
|
+
|
12
|
+
async def save(data: dict, client: AsyncIOMotorClient):
|
13
|
+
return await _ops._save_one_collection(COL_NAME, data, client)
|
14
|
+
|
15
|
+
|
16
|
+
async def find(date_str: str, client: AsyncIOMotorClient):
|
17
|
+
return await _ops.find(COL_NAME, date_str, client)
|
18
|
+
|
19
|
+
|
20
|
+
async def delete(date_str: str, client: AsyncIOMotorClient):
|
21
|
+
return await _ops.delete(COL_NAME, date_str, client)
|
22
|
+
|
23
|
+
|
24
|
+
async def save_history(df: pd.DataFrame, client: AsyncIOMotorClient):
|
25
|
+
numeric_columns = ["종가", "전일대비"]
|
26
|
+
await _ops._save_market_history_type1(df, COL_NAME, numeric_columns, client)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
from db2_hj3415.mi import _ops
|
2
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "kosdaq"
|
10
|
+
|
11
|
+
|
12
|
+
async def save(data: dict, client: AsyncIOMotorClient):
|
13
|
+
return await _ops._save_one_collection(COL_NAME, data, client)
|
14
|
+
|
15
|
+
|
16
|
+
async def find(date_str: str, client: AsyncIOMotorClient):
|
17
|
+
return await _ops.find(COL_NAME, date_str, client)
|
18
|
+
|
19
|
+
|
20
|
+
async def delete(date_str: str, client: AsyncIOMotorClient):
|
21
|
+
return await _ops.delete(COL_NAME, date_str, client)
|
22
|
+
|
23
|
+
|
24
|
+
async def save_history(df: pd.DataFrame, client: AsyncIOMotorClient):
|
25
|
+
numeric_columns = ["체결가", "전일비", "거래량(천주)", "거래대금(백만)"]
|
26
|
+
await _ops._save_market_history_type1(df, COL_NAME, numeric_columns, client)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
from db2_hj3415.mi import _ops
|
2
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "kospi"
|
10
|
+
|
11
|
+
|
12
|
+
async def save(data: dict, client: AsyncIOMotorClient):
|
13
|
+
return await _ops._save_one_collection(COL_NAME, data, client)
|
14
|
+
|
15
|
+
|
16
|
+
async def find(date_str: str, client: AsyncIOMotorClient):
|
17
|
+
return await _ops.find(COL_NAME, date_str, client)
|
18
|
+
|
19
|
+
|
20
|
+
async def delete(date_str: str, client: AsyncIOMotorClient):
|
21
|
+
return await _ops.delete(COL_NAME, date_str, client)
|
22
|
+
|
23
|
+
|
24
|
+
async def save_history(df: pd.DataFrame, client: AsyncIOMotorClient):
|
25
|
+
numeric_columns = ["체결가", "전일비", "거래량(천주)", "거래대금(백만)"]
|
26
|
+
await _ops._save_market_history_type1(df, COL_NAME, numeric_columns, client)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
from db2_hj3415.mi import _ops
|
2
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "silver"
|
10
|
+
|
11
|
+
|
12
|
+
async def save(data: dict, client: AsyncIOMotorClient):
|
13
|
+
return await _ops._save_one_collection(COL_NAME, data, client)
|
14
|
+
|
15
|
+
|
16
|
+
async def find(date_str: str, client: AsyncIOMotorClient):
|
17
|
+
return await _ops.find(COL_NAME, date_str, client)
|
18
|
+
|
19
|
+
|
20
|
+
async def delete(date_str: str, client: AsyncIOMotorClient):
|
21
|
+
return await _ops.delete(COL_NAME, date_str, client)
|
22
|
+
|
23
|
+
|
24
|
+
async def save_history(df: pd.DataFrame, client: AsyncIOMotorClient):
|
25
|
+
numeric_columns = ["종가", "전일대비"]
|
26
|
+
await _ops._save_market_history_type1(df, COL_NAME, numeric_columns, client)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
from db2_hj3415.mi import _ops
|
2
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "sp500"
|
10
|
+
|
11
|
+
|
12
|
+
async def save(data: dict, client: AsyncIOMotorClient):
|
13
|
+
return await _ops._save_one_collection(COL_NAME, data, client)
|
14
|
+
|
15
|
+
|
16
|
+
async def find(date_str: str, client: AsyncIOMotorClient):
|
17
|
+
return await _ops.find(COL_NAME, date_str, client)
|
18
|
+
|
19
|
+
|
20
|
+
async def delete(date_str: str, client: AsyncIOMotorClient):
|
21
|
+
return await _ops.delete(COL_NAME, date_str, client)
|
22
|
+
|
23
|
+
|
24
|
+
async def save_history(df: pd.DataFrame, client: AsyncIOMotorClient):
|
25
|
+
numeric_columns = ["종가", "전일대비", "시가", "고가", "저가"]
|
26
|
+
await _ops._save_market_history_type1(df, COL_NAME, numeric_columns, client)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
from db2_hj3415.mi import _ops
|
2
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
3
|
+
|
4
|
+
from utils_hj3415 import setup_logger
|
5
|
+
|
6
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
7
|
+
|
8
|
+
COL_NAME = "usdidx"
|
9
|
+
|
10
|
+
|
11
|
+
async def save(data: dict, client: AsyncIOMotorClient):
|
12
|
+
return await _ops._save_one_collection(COL_NAME, data, client)
|
13
|
+
|
14
|
+
|
15
|
+
async def find(date_str: str, client: AsyncIOMotorClient):
|
16
|
+
return await _ops.find(COL_NAME, date_str, client)
|
17
|
+
|
18
|
+
|
19
|
+
async def delete(date_str: str, client: AsyncIOMotorClient):
|
20
|
+
return await _ops.delete(COL_NAME, date_str, client)
|
21
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
from db2_hj3415.mi import _ops
|
2
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "usdkrw"
|
10
|
+
|
11
|
+
|
12
|
+
async def save(data: dict, client: AsyncIOMotorClient):
|
13
|
+
return await _ops._save_one_collection(COL_NAME, data, client)
|
14
|
+
|
15
|
+
|
16
|
+
async def find(date_str: str, client: AsyncIOMotorClient):
|
17
|
+
return await _ops.find(COL_NAME, date_str, client)
|
18
|
+
|
19
|
+
|
20
|
+
async def delete(date_str: str, client: AsyncIOMotorClient):
|
21
|
+
return await _ops.delete(COL_NAME, date_str, client)
|
22
|
+
|
23
|
+
|
24
|
+
async def save_history(df: pd.DataFrame, client: AsyncIOMotorClient):
|
25
|
+
numeric_columns = ["매매기준율", "전일대비", "현찰로 사실 때", "현찰로 파실 때", "송금 보내실 때", "송금 받으실 때"]
|
26
|
+
await _ops._save_market_history_type1(df, COL_NAME, numeric_columns, client)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
from db2_hj3415.mi import _ops
|
2
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "wti"
|
10
|
+
|
11
|
+
|
12
|
+
async def save(data: dict, client: AsyncIOMotorClient):
|
13
|
+
return await _ops._save_one_collection(COL_NAME, data, client)
|
14
|
+
|
15
|
+
|
16
|
+
async def find(date_str: str, client: AsyncIOMotorClient):
|
17
|
+
return await _ops.find(COL_NAME, date_str, client)
|
18
|
+
|
19
|
+
|
20
|
+
async def delete(date_str: str, client: AsyncIOMotorClient):
|
21
|
+
return await _ops.delete(COL_NAME, date_str, client)
|
22
|
+
|
23
|
+
|
24
|
+
async def save_history(df: pd.DataFrame, client: AsyncIOMotorClient):
|
25
|
+
numeric_columns = ["종가", "전일대비"]
|
26
|
+
await _ops._save_market_history_type1(df, COL_NAME, numeric_columns, client)
|
@@ -0,0 +1,198 @@
|
|
1
|
+
from deepdiff import DeepDiff
|
2
|
+
from pymongo import ASCENDING, DESCENDING
|
3
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
4
|
+
import pprint
|
5
|
+
import pandas as pd
|
6
|
+
import json
|
7
|
+
from db2_hj3415.nfs import DB_NAME
|
8
|
+
from db2_hj3415.common.db_ops import get_collection
|
9
|
+
from datetime import datetime, timezone
|
10
|
+
|
11
|
+
from utils_hj3415 import setup_logger
|
12
|
+
|
13
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
14
|
+
|
15
|
+
|
16
|
+
async def _compare_and_log_diff(code: str, new_doc: dict, latest_doc: dict | None, client: AsyncIOMotorClient) -> bool:
|
17
|
+
"""
|
18
|
+
최신 문서와 새 문서를 비교하여 변경 사항이 있는 경우만 로그에 기록하고 True를 반환합니다.
|
19
|
+
|
20
|
+
- "_id"와 "날짜" 필드는 비교 대상에서 제외합니다.
|
21
|
+
- 변경된 항목이 있으면 MongoDB의 "change_log" 컬렉션에 차이(diff)를 기록합니다.
|
22
|
+
- 변경 사항이 없으면 저장하지 않고 False를 반환합니다.
|
23
|
+
|
24
|
+
Parameters:
|
25
|
+
code (str): 비교 대상이 되는 종목 코드
|
26
|
+
new_doc (dict): 새로 생성된 문서
|
27
|
+
latest_doc (dict | None): 기존의 최신 문서 (없을 수 있음)
|
28
|
+
client (AsyncIOMotorClient): 몽고 클라이언트
|
29
|
+
|
30
|
+
Returns:
|
31
|
+
bool: 변경 사항이 있으면 True, 없으면 False
|
32
|
+
"""
|
33
|
+
if not latest_doc:
|
34
|
+
return True
|
35
|
+
|
36
|
+
latest_doc.pop("_id", None)
|
37
|
+
latest_doc.pop("날짜", None)
|
38
|
+
new_copy = dict(new_doc)
|
39
|
+
new_copy.pop("날짜", None)
|
40
|
+
|
41
|
+
diff = DeepDiff(latest_doc, new_copy, ignore_order=True)
|
42
|
+
if not diff:
|
43
|
+
print(f"{code} 기존 문서와 동일 - 저장하지 않음")
|
44
|
+
return False
|
45
|
+
|
46
|
+
print("변경된 항목:")
|
47
|
+
for change_type, changes in diff.items():
|
48
|
+
print(f"- {change_type}:")
|
49
|
+
for path, value in changes.items():
|
50
|
+
print(f" {path}: {value}")
|
51
|
+
|
52
|
+
await client[DB_NAME]["change_log"].insert_one({
|
53
|
+
"코드": code,
|
54
|
+
"변경시각": datetime.now(timezone.utc),
|
55
|
+
"변경내용": json.loads(diff.to_json())
|
56
|
+
})
|
57
|
+
|
58
|
+
return True
|
59
|
+
|
60
|
+
|
61
|
+
def _prepare_c10346_document(code: str, data: dict[str, pd.DataFrame]) -> dict:
|
62
|
+
"""
|
63
|
+
종목 코드와 여러 페이지의 DataFrame 데이터를 바탕으로 MongoDB에 저장할 문서(dict)를 생성합니다.
|
64
|
+
|
65
|
+
각 DataFrame은 null 값을 None으로 변환한 후, 레코드(행) 단위의 딕셔너리 리스트로 변환됩니다.
|
66
|
+
생성된 문서에는 '코드', '날짜', 그리고 각 페이지 이름을 키로 하는 데이터가 포함됩니다.
|
67
|
+
|
68
|
+
Args:
|
69
|
+
code (str): 종목 코드 (6자리 문자열).
|
70
|
+
data (dict[str, pd.DataFrame]): 페이지 이름을 키로 하고, 해당 페이지의 데이터를 담은 DataFrame을 값으로 가지는 딕셔너리.
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
dict: MongoDB에 저장 가능한 형식의 문서. 예: {
|
74
|
+
"코드": "005930",
|
75
|
+
"날짜": <datetime>,
|
76
|
+
"재무상태표y": <DataFrame>,
|
77
|
+
"손익계산서y": <DataFrame>,
|
78
|
+
...
|
79
|
+
}
|
80
|
+
"""
|
81
|
+
now = datetime.now(timezone.utc)
|
82
|
+
document = {"코드": code, "날짜": now}
|
83
|
+
for page, df in data.items():
|
84
|
+
if isinstance(df, pd.DataFrame):
|
85
|
+
document[page] = df.where(pd.notnull(df), None).to_dict(orient="records")
|
86
|
+
return document
|
87
|
+
|
88
|
+
|
89
|
+
async def save(col: str, code: str, data: dict[str, pd.DataFrame], client: AsyncIOMotorClient) -> dict:
|
90
|
+
collection = get_collection(client, DB_NAME, col)
|
91
|
+
|
92
|
+
await collection.create_index([("코드", ASCENDING), ("날짜", ASCENDING)], unique=True)
|
93
|
+
|
94
|
+
document = _prepare_c10346_document(code, data)
|
95
|
+
latest_doc = await collection.find_one({"코드": code}, sort=[("날짜", DESCENDING)])
|
96
|
+
|
97
|
+
need_save = await _compare_and_log_diff(code, document, latest_doc, client)
|
98
|
+
if not need_save:
|
99
|
+
return {"status": "unchanged"}
|
100
|
+
|
101
|
+
result = await collection.insert_one(document)
|
102
|
+
print(f"삽입됨: {DB_NAME}.{collection.name} / {code} (id={result.inserted_id})")
|
103
|
+
del_result = await collection.delete_many({
|
104
|
+
"_id": {"$in": [
|
105
|
+
doc["_id"] for doc in await collection.find({"코드": code}).sort("날짜", DESCENDING).skip(2).to_list(length=None)
|
106
|
+
]}
|
107
|
+
})
|
108
|
+
print(f"삭제된 이전 문서 수: {del_result.deleted_count}")
|
109
|
+
|
110
|
+
return {"status": "inserted", "id": str(result.inserted_id)}
|
111
|
+
|
112
|
+
|
113
|
+
async def save_many(col: str, many_data: dict[str, dict[str, pd.DataFrame]], client: AsyncIOMotorClient) -> list[dict]:
|
114
|
+
collection = get_collection(client, DB_NAME, col)
|
115
|
+
await collection.create_index([("코드", ASCENDING), ("날짜", ASCENDING)], unique=True)
|
116
|
+
|
117
|
+
results = []
|
118
|
+
|
119
|
+
for code, data in many_data.items():
|
120
|
+
document = _prepare_c10346_document(code, data)
|
121
|
+
latest_doc = await collection.find_one({"코드": code}, sort=[("날짜", DESCENDING)])
|
122
|
+
need_save = await _compare_and_log_diff(code, document, latest_doc, client)
|
123
|
+
|
124
|
+
if not need_save:
|
125
|
+
results.append({"code": code, "status": "unchanged"})
|
126
|
+
continue
|
127
|
+
|
128
|
+
result = await collection.insert_one(document)
|
129
|
+
await collection.delete_many({
|
130
|
+
"_id": {"$in": [
|
131
|
+
doc["_id"] for doc in await collection.find({"코드": code}).sort("날짜", DESCENDING).skip(2).to_list(length=None)
|
132
|
+
]}
|
133
|
+
})
|
134
|
+
|
135
|
+
results.append({"code": code, "status": "inserted", "id": str(result.inserted_id)})
|
136
|
+
|
137
|
+
pprint.pprint(results)
|
138
|
+
return results
|
139
|
+
|
140
|
+
|
141
|
+
async def get_latest(col: str, code: str, page: str, client: AsyncIOMotorClient) -> pd.DataFrame | None:
|
142
|
+
collection = get_collection(client, DB_NAME, col)
|
143
|
+
|
144
|
+
# 최신 날짜 기준으로 정렬하여 1건만 조회
|
145
|
+
latest_doc = await collection.find_one(
|
146
|
+
{"코드": code},
|
147
|
+
sort=[("날짜", DESCENDING)]
|
148
|
+
)
|
149
|
+
|
150
|
+
if not latest_doc or page not in latest_doc:
|
151
|
+
print(f"문서 없음 또는 '{page}' 항목 없음")
|
152
|
+
return None
|
153
|
+
|
154
|
+
# records → DataFrame
|
155
|
+
records = latest_doc[page]
|
156
|
+
df = pd.DataFrame(records)
|
157
|
+
return df
|
158
|
+
|
159
|
+
|
160
|
+
async def has_doc_changed(col: str, code: str, client: AsyncIOMotorClient) -> bool:
|
161
|
+
"""
|
162
|
+
MongoDB에서 특정 컬렉션과 종목 코드에 대해 최신 두 개의 문서를 비교하여 변경 여부를 확인합니다.
|
163
|
+
|
164
|
+
비교 대상 문서가 두 개 미만이면 True를 반환하여 새 문서로 간주합니다.
|
165
|
+
비교는 `_id`, `날짜` 필드를 제외하고 수행하며, 변경 내용이 있을 경우 change_log에 기록됩니다.
|
166
|
+
|
167
|
+
Args:
|
168
|
+
col (str): 컬렉션 이름 (예: 'c103' 'c104', 'c106'등).
|
169
|
+
code (str): 종목 코드 (6자리 문자열).
|
170
|
+
client (AsyncIOMotorClient): MongoDB 비동기 클라이언트 인스턴스.
|
171
|
+
|
172
|
+
Returns:
|
173
|
+
bool: 문서가 변경되었는지 여부. True면 변경됨 또는 비교 불가 상태.
|
174
|
+
"""
|
175
|
+
collection = get_collection(client, DB_NAME, col)
|
176
|
+
|
177
|
+
# 최신 문서 2개 조회 (내림차순)
|
178
|
+
docs = await collection.find({"코드": code}).sort("날짜", DESCENDING).limit(2).to_list(length=2)
|
179
|
+
|
180
|
+
if len(docs) < 2:
|
181
|
+
print(f"{code} 문서가 1개 이하임 - 비교 불가")
|
182
|
+
return True # 비교할 게 없으면 새로 저장해야 하므로 True
|
183
|
+
|
184
|
+
new_doc, latest_doc = docs[0], docs[1]
|
185
|
+
|
186
|
+
new_doc.pop("_id", None)
|
187
|
+
new_doc.pop("날짜", None)
|
188
|
+
latest_doc.pop("_id", None)
|
189
|
+
latest_doc.pop("날짜", None)
|
190
|
+
|
191
|
+
mylogger.debug(new_doc)
|
192
|
+
mylogger.debug(latest_doc)
|
193
|
+
|
194
|
+
# 비교 함수 호출
|
195
|
+
return await _compare_and_log_diff(code, new_doc, latest_doc, client)
|
196
|
+
|
197
|
+
|
198
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
2
|
+
from db2_hj3415.nfs import DB_NAME
|
3
|
+
|
4
|
+
|
5
|
+
async def get_all_codes(client: AsyncIOMotorClient) -> list[str]:
|
6
|
+
"""
|
7
|
+
c103, c104, c106 컬렉션에 모두 존재하는 코드의 리스트를 반환함.
|
8
|
+
|
9
|
+
Args:
|
10
|
+
client (AsyncIOMotorClient): MongoDB 비동기 클라이언트 객체
|
11
|
+
|
12
|
+
Returns:
|
13
|
+
list[str]: c103, c104, c106 컬렉션에 공통으로 존재하는 종목 코드 리스트
|
14
|
+
"""
|
15
|
+
db = client[DB_NAME]
|
16
|
+
|
17
|
+
collections = ['c103', 'c104', 'c106']
|
18
|
+
|
19
|
+
# 첫 컬렉션으로 초기화
|
20
|
+
s = set(await db[collections[0]].distinct("코드"))
|
21
|
+
|
22
|
+
for col in collections[1:]:
|
23
|
+
codes = await db[col].distinct("코드")
|
24
|
+
s &= set(codes)
|
25
|
+
|
26
|
+
return list(s)
|
27
|
+
|
28
|
+
|
29
|
+
async def delete_code_from_all_collections(code: str, client: AsyncIOMotorClient) -> dict[str, int]:
|
30
|
+
db = client[DB_NAME]
|
31
|
+
|
32
|
+
collections = ['c101', 'c103', 'c104', 'c106', 'c108']
|
33
|
+
|
34
|
+
deleted_counts = {}
|
35
|
+
|
36
|
+
for col in collections:
|
37
|
+
result = await db[col].delete_many({"코드": code})
|
38
|
+
deleted_counts[col] = result.deleted_count
|
39
|
+
|
40
|
+
print(f"삭제된 도큐먼트 갯수: {deleted_counts}")
|
41
|
+
return deleted_counts
|
42
|
+
|
@@ -0,0 +1,124 @@
|
|
1
|
+
from pymongo import ASCENDING, UpdateOne, DESCENDING
|
2
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
3
|
+
from datetime import datetime, timezone
|
4
|
+
import pandas as pd
|
5
|
+
|
6
|
+
from db2_hj3415.nfs import DATE_FORMAT, DB_NAME
|
7
|
+
from db2_hj3415.common.db_ops import get_collection
|
8
|
+
from utils_hj3415 import setup_logger
|
9
|
+
|
10
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
11
|
+
|
12
|
+
COL_NAME = "c101"
|
13
|
+
|
14
|
+
def _prepare_c101_document(doc: dict) -> dict | None:
|
15
|
+
"""
|
16
|
+
C101 컬렉션에 저장할 문서를 사전 처리합니다.
|
17
|
+
|
18
|
+
- '코드'와 '날짜' 필드가 없으면 None을 반환합니다.
|
19
|
+
- '날짜' 필드는 UTC 타임존을 포함한 datetime 객체로 변환합니다.
|
20
|
+
- 날짜 형식이 잘못된 경우 None을 반환합니다.
|
21
|
+
|
22
|
+
Parameters:
|
23
|
+
doc (dict): 원시 입력 문서 (예: 스크래핑 또는 파싱 결과)
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
dict | None: 정상적으로 처리된 문서 또는 오류 시 None
|
27
|
+
"""
|
28
|
+
code = doc.get("코드")
|
29
|
+
date_str = doc.get("날짜")
|
30
|
+
|
31
|
+
if not code or not date_str:
|
32
|
+
print(f"코드 또는 날짜 누락: {code} / {date_str}")
|
33
|
+
return None
|
34
|
+
|
35
|
+
try:
|
36
|
+
doc["날짜"] = datetime.strptime(date_str, DATE_FORMAT).replace(tzinfo=timezone.utc)
|
37
|
+
except ValueError:
|
38
|
+
print(f"날짜 형식 오류 - 건너뜀: {code} / {date_str}")
|
39
|
+
return None
|
40
|
+
|
41
|
+
return doc
|
42
|
+
|
43
|
+
|
44
|
+
async def save(data: dict | None, client: AsyncIOMotorClient) -> dict:
|
45
|
+
if not data:
|
46
|
+
print("데이터 없음 - 저장 생략")
|
47
|
+
return {"status": "unchanged"}
|
48
|
+
|
49
|
+
collection = get_collection(client, DB_NAME, COL_NAME)
|
50
|
+
await collection.create_index([("날짜", ASCENDING), ("코드", ASCENDING)], unique=True)
|
51
|
+
|
52
|
+
doc = _prepare_c101_document(data)
|
53
|
+
if not doc:
|
54
|
+
return {"status": "unchanged"}
|
55
|
+
|
56
|
+
filter_ = {"날짜": doc["날짜"], "코드": doc["코드"]}
|
57
|
+
result = await collection.update_one(filter_, {"$set": doc}, upsert=True)
|
58
|
+
if result.upserted_id:
|
59
|
+
return {"status": f"upserted {result.upserted_id}"}
|
60
|
+
elif result.modified_count:
|
61
|
+
return {"status": f"modified"}
|
62
|
+
else:
|
63
|
+
return {"status": "unchanged"}
|
64
|
+
|
65
|
+
|
66
|
+
async def save_many(many_data: dict[str, dict | None], client: AsyncIOMotorClient) -> dict:
|
67
|
+
collection = get_collection(client, DB_NAME, COL_NAME)
|
68
|
+
await collection.create_index([("날짜", ASCENDING), ("코드", ASCENDING)], unique=True)
|
69
|
+
|
70
|
+
operations = []
|
71
|
+
inserted, updated, skipped = 0, 0, 0
|
72
|
+
for code, doc in many_data.items():
|
73
|
+
if not doc:
|
74
|
+
print(f"{code}: 데이터 없음 - 건너뜀")
|
75
|
+
continue
|
76
|
+
|
77
|
+
doc = _prepare_c101_document(doc)
|
78
|
+
if not doc:
|
79
|
+
continue
|
80
|
+
|
81
|
+
filter_ = {"날짜": doc["날짜"], "코드": doc["코드"]}
|
82
|
+
operations.append(UpdateOne(filter_, {"$set": doc}, upsert=True))
|
83
|
+
|
84
|
+
if operations:
|
85
|
+
result = await collection.bulk_write(operations)
|
86
|
+
inserted = result.upserted_count
|
87
|
+
updated = result.modified_count
|
88
|
+
print(f"저장 완료: inserted={inserted}, updated={updated}")
|
89
|
+
else:
|
90
|
+
print(f"저장할 작업 없음")
|
91
|
+
return {"inserted": inserted, "updated": updated}
|
92
|
+
|
93
|
+
|
94
|
+
async def get_latest(code: str, client: AsyncIOMotorClient) -> dict | None:
|
95
|
+
collection = get_collection(client, DB_NAME, COL_NAME)
|
96
|
+
doc = await collection.find_one(
|
97
|
+
{"코드": code},
|
98
|
+
sort=[("날짜", DESCENDING)]
|
99
|
+
)
|
100
|
+
|
101
|
+
if doc:
|
102
|
+
doc.pop("_id", None)
|
103
|
+
return doc
|
104
|
+
else:
|
105
|
+
print(f"데이터 없음: {code}")
|
106
|
+
return None
|
107
|
+
|
108
|
+
|
109
|
+
async def get_all_as_df(code: str, client: AsyncIOMotorClient) -> pd.DataFrame | None:
|
110
|
+
collection = get_collection(client, DB_NAME, COL_NAME)
|
111
|
+
cursor = collection.find({"코드": code}).sort("날짜", ASCENDING)
|
112
|
+
docs = await cursor.to_list(length=None)
|
113
|
+
|
114
|
+
if not docs:
|
115
|
+
print(f"[{code}] 관련 문서 없음")
|
116
|
+
return None
|
117
|
+
|
118
|
+
# _id 필드는 문자열로 변환하거나 제거
|
119
|
+
for doc in docs:
|
120
|
+
doc.pop("_id", None)
|
121
|
+
|
122
|
+
df = pd.DataFrame(docs)
|
123
|
+
return df
|
124
|
+
|
@@ -0,0 +1,38 @@
|
|
1
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
from db2_hj3415.nfs import _c10346
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "c103"
|
10
|
+
|
11
|
+
|
12
|
+
async def save(code: str, data: dict[str, pd.DataFrame], client: AsyncIOMotorClient) -> dict:
|
13
|
+
return await _c10346.save(COL_NAME, code, data, client)
|
14
|
+
|
15
|
+
|
16
|
+
async def save_many(many_data: dict[str, dict[str, pd.DataFrame]], client: AsyncIOMotorClient) -> list[dict]:
|
17
|
+
return await _c10346.save_many(COL_NAME, many_data, client)
|
18
|
+
|
19
|
+
|
20
|
+
async def get_latest(code: str, page: str, client: AsyncIOMotorClient) -> pd.DataFrame | None:
|
21
|
+
return await _c10346.get_latest(COL_NAME, code, page, client)
|
22
|
+
|
23
|
+
|
24
|
+
async def has_doc_changed(code: str, client: AsyncIOMotorClient) -> bool:
|
25
|
+
"""
|
26
|
+
C103 컬렉션에서 종목 코드에 대해 최신 두 개의 문서를 비교하여 변경 여부를 확인합니다.
|
27
|
+
|
28
|
+
비교 대상 문서가 두 개 미만이면 True를 반환하여 새 문서로 간주합니다.
|
29
|
+
비교는 `_id`, `날짜` 필드를 제외하고 수행하며, 변경 내용이 있을 경우 change_log에 기록됩니다.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
code (str): 종목 코드 (6자리 문자열).
|
33
|
+
client (AsyncIOMotorClient): MongoDB 비동기 클라이언트 인스턴스.
|
34
|
+
|
35
|
+
Returns:
|
36
|
+
bool: 문서가 변경되었는지 여부. True면 변경됨 또는 비교 불가 상태.
|
37
|
+
"""
|
38
|
+
return await _c10346.has_doc_changed(COL_NAME, code, client)
|
@@ -0,0 +1,38 @@
|
|
1
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
from db2_hj3415.nfs import _c10346
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "c104"
|
10
|
+
|
11
|
+
|
12
|
+
async def save(code: str, data: dict[str, pd.DataFrame], client: AsyncIOMotorClient) -> dict:
|
13
|
+
return await _c10346.save(COL_NAME, code, data, client)
|
14
|
+
|
15
|
+
|
16
|
+
async def save_many(many_data: dict[str, dict[str, pd.DataFrame]], client: AsyncIOMotorClient) -> list[dict]:
|
17
|
+
return await _c10346.save_many(COL_NAME, many_data, client)
|
18
|
+
|
19
|
+
|
20
|
+
async def get_latest(code: str, page: str, client: AsyncIOMotorClient) -> pd.DataFrame | None:
|
21
|
+
return await _c10346.get_latest(COL_NAME, code, page, client)
|
22
|
+
|
23
|
+
|
24
|
+
async def has_doc_changed(code: str, client: AsyncIOMotorClient) -> bool:
|
25
|
+
"""
|
26
|
+
C104 컬렉션에서 종목 코드에 대해 최신 두 개의 문서를 비교하여 변경 여부를 확인합니다.
|
27
|
+
|
28
|
+
비교 대상 문서가 두 개 미만이면 True를 반환하여 새 문서로 간주합니다.
|
29
|
+
비교는 `_id`, `날짜` 필드를 제외하고 수행하며, 변경 내용이 있을 경우 change_log에 기록됩니다.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
code (str): 종목 코드 (6자리 문자열).
|
33
|
+
client (AsyncIOMotorClient): MongoDB 비동기 클라이언트 인스턴스.
|
34
|
+
|
35
|
+
Returns:
|
36
|
+
bool: 문서가 변경되었는지 여부. True면 변경됨 또는 비교 불가 상태.
|
37
|
+
"""
|
38
|
+
return await _c10346.has_doc_changed(COL_NAME, code, client)
|
@@ -0,0 +1,38 @@
|
|
1
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
from db2_hj3415.nfs import _c10346
|
5
|
+
from utils_hj3415 import setup_logger
|
6
|
+
|
7
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
8
|
+
|
9
|
+
COL_NAME = "c106"
|
10
|
+
|
11
|
+
|
12
|
+
async def save(code: str, data: dict[str, pd.DataFrame], client: AsyncIOMotorClient) -> dict:
|
13
|
+
return await _c10346.save(COL_NAME, code, data, client)
|
14
|
+
|
15
|
+
|
16
|
+
async def save_many(many_data: dict[str, dict[str, pd.DataFrame]], client: AsyncIOMotorClient) -> list[dict]:
|
17
|
+
return await _c10346.save_many(COL_NAME, many_data, client)
|
18
|
+
|
19
|
+
|
20
|
+
async def get_latest(code: str, page: str, client: AsyncIOMotorClient) -> pd.DataFrame | None:
|
21
|
+
return await _c10346.get_latest(COL_NAME, code, page, client)
|
22
|
+
|
23
|
+
|
24
|
+
async def has_doc_changed(code: str, client: AsyncIOMotorClient) -> bool:
|
25
|
+
"""
|
26
|
+
C106 컬렉션에서 종목 코드에 대해 최신 두 개의 문서를 비교하여 변경 여부를 확인합니다.
|
27
|
+
|
28
|
+
비교 대상 문서가 두 개 미만이면 True를 반환하여 새 문서로 간주합니다.
|
29
|
+
비교는 `_id`, `날짜` 필드를 제외하고 수행하며, 변경 내용이 있을 경우 change_log에 기록됩니다.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
code (str): 종목 코드 (6자리 문자열).
|
33
|
+
client (AsyncIOMotorClient): MongoDB 비동기 클라이언트 인스턴스.
|
34
|
+
|
35
|
+
Returns:
|
36
|
+
bool: 문서가 변경되었는지 여부. True면 변경됨 또는 비교 불가 상태.
|
37
|
+
"""
|
38
|
+
return await _c10346.has_doc_changed(COL_NAME, code, client)
|
@@ -0,0 +1,77 @@
|
|
1
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
2
|
+
from pymongo import ASCENDING, UpdateOne
|
3
|
+
import pandas as pd
|
4
|
+
from datetime import datetime, timezone
|
5
|
+
|
6
|
+
from db2_hj3415.nfs import DATE_FORMAT, DB_NAME
|
7
|
+
from db2_hj3415.common.db_ops import get_collection
|
8
|
+
from utils_hj3415 import setup_logger
|
9
|
+
|
10
|
+
mylogger = setup_logger(__name__, 'WARNING')
|
11
|
+
|
12
|
+
COL_NAME = 'c108'
|
13
|
+
|
14
|
+
|
15
|
+
async def save(code: str, data: pd.DataFrame, client: AsyncIOMotorClient) -> dict:
|
16
|
+
if data is None or data.empty:
|
17
|
+
print("데이터 없음 - 저장 생략")
|
18
|
+
return {"status": "unchanged"}
|
19
|
+
|
20
|
+
collection = get_collection(client, DB_NAME, COL_NAME)
|
21
|
+
|
22
|
+
await collection.create_index(
|
23
|
+
[("코드", ASCENDING), ("날짜", ASCENDING), ("제목", ASCENDING)],
|
24
|
+
unique=True
|
25
|
+
)
|
26
|
+
|
27
|
+
# NaN -> None 변환
|
28
|
+
df = data.where(pd.notnull(data), None)
|
29
|
+
operations = []
|
30
|
+
inserted, updated, skipped = 0, 0, 0
|
31
|
+
|
32
|
+
for _, row in df.iterrows():
|
33
|
+
try:
|
34
|
+
date_str = str(row["날짜"])
|
35
|
+
date_obj = datetime.strptime(date_str, DATE_FORMAT).replace(tzinfo=timezone.utc)
|
36
|
+
|
37
|
+
doc = row.to_dict()
|
38
|
+
doc["코드"] = code
|
39
|
+
doc["날짜"] = date_obj
|
40
|
+
|
41
|
+
filter_ = {"코드": code, "날짜": date_obj, "제목": doc["제목"]}
|
42
|
+
operations.append(UpdateOne(filter_, {"$set": doc}, upsert=True))
|
43
|
+
except Exception as e:
|
44
|
+
print(f"변환 에러 - {row.get('제목', '제목 없음')}: {e}")
|
45
|
+
continue
|
46
|
+
|
47
|
+
if operations:
|
48
|
+
result = await collection.bulk_write(operations, ordered=False)
|
49
|
+
inserted = result.upserted_count
|
50
|
+
updated = result.modified_count
|
51
|
+
print(f"[{code}] 저장 완료: inserted={inserted}, updated={updated}")
|
52
|
+
else:
|
53
|
+
print(f"[{code}] 저장할 작업 없음")
|
54
|
+
|
55
|
+
return {"inserted": inserted, "updated": updated}
|
56
|
+
|
57
|
+
|
58
|
+
async def save_many(many_data: dict[str, pd.DataFrame], client: AsyncIOMotorClient) -> dict:
|
59
|
+
total_result = {"inserted": 0, "updated": 0, "skipped": 0, "errors": []}
|
60
|
+
|
61
|
+
for code, df in many_data.items():
|
62
|
+
if df is None:
|
63
|
+
print(f"[{code}] 리포트 없음 - 건너뜀")
|
64
|
+
continue
|
65
|
+
|
66
|
+
try:
|
67
|
+
result = await save(code, df, client)
|
68
|
+
total_result["inserted"] += result.get("inserted", 0)
|
69
|
+
total_result["updated"] += result.get("updated", 0)
|
70
|
+
total_result["skipped"] += result.get("skipped", 0)
|
71
|
+
except Exception as e:
|
72
|
+
print(f"[{code}] 저장 실패: {e}")
|
73
|
+
total_result["errors"].append({"code": code, "error": str(e)})
|
74
|
+
|
75
|
+
return total_result
|
76
|
+
|
77
|
+
|
File without changes
|
File without changes
|
@@ -0,0 +1,26 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["flit_core >=3.2,<4"]
|
3
|
+
build-backend = "flit_core.buildapi"
|
4
|
+
|
5
|
+
[project]
|
6
|
+
name = "db2_hj3415"
|
7
|
+
version = "0.1.0"
|
8
|
+
authors = [{name = "Hyungjin Kim", email = "hj3415@gmail.com"}]
|
9
|
+
description = "Gathering the stock data by playwright"
|
10
|
+
readme = "README.md"
|
11
|
+
classifiers = ["License :: OSI Approved :: MIT License"]
|
12
|
+
dependencies = [
|
13
|
+
"motor",
|
14
|
+
"pandas",
|
15
|
+
"deepdiff",
|
16
|
+
"utils_hj3415>=3.2.3",
|
17
|
+
]
|
18
|
+
|
19
|
+
[project.scripts]
|
20
|
+
db = "db2_hj3415.cli.db:main"
|
21
|
+
|
22
|
+
[project.urls]
|
23
|
+
Home = "https://www.hyungjin.kr"
|
24
|
+
|
25
|
+
[tool.flit.sdist]
|
26
|
+
exclude = ["tests/", ".gitignore",]
|