neurostats-API 0.0.21b0__py3-none-any.whl → 0.0.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neurostats_API/__init__.py +1 -1
- neurostats_API/fetchers/balance_sheet.py +152 -102
- neurostats_API/fetchers/base.py +93 -74
- neurostats_API/fetchers/cash_flow.py +143 -113
- neurostats_API/fetchers/finance_overview.py +28 -28
- neurostats_API/fetchers/institution.py +211 -97
- neurostats_API/fetchers/margin_trading.py +121 -94
- neurostats_API/fetchers/month_revenue.py +139 -105
- neurostats_API/fetchers/profit_lose.py +203 -108
- neurostats_API/fetchers/tech.py +117 -42
- neurostats_API/fetchers/tej_finance_report.py +248 -338
- neurostats_API/fetchers/value_invest.py +32 -12
- neurostats_API/tools/company_list/tw.json +2175 -0
- neurostats_API/tools/tej_db/tej_db_percent_index.yaml +0 -3
- neurostats_API/tools/tej_db/tej_db_skip_index.yaml +14 -1
- neurostats_API/tools/tej_db/tej_db_thousand_index.yaml +0 -5
- neurostats_API/utils/__init__.py +0 -1
- neurostats_API/utils/calculate_value.py +102 -1
- neurostats_API/utils/data_process.py +53 -19
- neurostats_API/utils/logger.py +21 -0
- {neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23.dist-info}/METADATA +2 -2
- neurostats_API-0.0.23.dist-info/RECORD +35 -0
- neurostats_API/utils/fetcher.py +0 -1056
- neurostats_API-0.0.21b0.dist-info/RECORD +0 -34
- /neurostats_API/tools/{balance_sheet.yaml → twse/balance_sheet.yaml} +0 -0
- /neurostats_API/tools/{cash_flow_percentage.yaml → twse/cash_flow_percentage.yaml} +0 -0
- /neurostats_API/tools/{finance_overview_dict.yaml → twse/finance_overview_dict.yaml} +0 -0
- /neurostats_API/tools/{profit_lose.yaml → twse/profit_lose.yaml} +0 -0
- /neurostats_API/tools/{seasonal_data_field_dict.txt → twse/seasonal_data_field_dict.txt} +0 -0
- {neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23.dist-info}/WHEEL +0 -0
- {neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23.dist-info}/top_level.txt +0 -0
@@ -3,11 +3,10 @@ import importlib.resources as pkg_resources
|
|
3
3
|
import json
|
4
4
|
import numpy as np
|
5
5
|
import pandas as pd
|
6
|
-
from ..utils import StatsDateTime, StatsProcessor
|
6
|
+
from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
|
7
7
|
import yaml
|
8
8
|
|
9
9
|
|
10
|
-
|
11
10
|
class ProfitLoseFetcher(StatsFetcher):
|
12
11
|
"""
|
13
12
|
iFa.ai: 財務分析 -> 損益表
|
@@ -16,143 +15,239 @@ class ProfitLoseFetcher(StatsFetcher):
|
|
16
15
|
def __init__(self, ticker, db_client):
|
17
16
|
super().__init__(ticker, db_client)
|
18
17
|
|
19
|
-
self.table_settings = StatsProcessor.load_yaml("profit_lose.yaml")
|
18
|
+
self.table_settings = StatsProcessor.load_yaml("twse/profit_lose.yaml")
|
19
|
+
|
20
|
+
self.process_function_map = {
|
21
|
+
"twse_stats": self.process_data_twse,
|
22
|
+
"us_stats": self.process_data_us
|
23
|
+
}
|
20
24
|
|
21
|
-
|
25
|
+
self.return_keys = [
|
26
|
+
'profit_lose', 'grand_total_profit_lose', 'revenue', 'grand_total_revenue',
|
27
|
+
'gross_profit', 'grand_total_gross_profit', 'gross_profit_percentage',
|
28
|
+
'grand_total_gross_profit_percentage', 'operating_income', 'grand_total_operating_income', 'operating_income_percentage',
|
29
|
+
'grand_total_operating_income_percentage', 'net_income_before_tax', 'grand_total_net_income_before_tax', 'net_income_before_tax_percentage',
|
30
|
+
'grand_total_net_income_before_tax_percentage', 'net_income', 'grand_total_net_income', 'net_income_percentage',
|
31
|
+
'grand_total_income_percentage', 'EPS', 'EPS_growth', 'grand_total_EPS',
|
32
|
+
'grand_total_EPS_growth', 'profit_lose_all', 'profit_lose_YoY'
|
33
|
+
]
|
34
|
+
|
35
|
+
def prepare_query(self):
|
22
36
|
pipeline = super().prepare_query()
|
23
37
|
|
24
|
-
|
38
|
+
name_map = {"twse_stats": "profit_lose", "us_stats": "income_statement"}
|
39
|
+
|
40
|
+
chart_name = name_map.get(self.collection_name, "income_statement")
|
41
|
+
|
42
|
+
append_pipeline = [
|
43
|
+
{
|
25
44
|
"$project": {
|
26
45
|
"_id": 0,
|
27
46
|
"ticker": 1,
|
28
47
|
"company_name": 1,
|
29
|
-
"
|
30
|
-
"$
|
31
|
-
"input": {
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
"$eq":
|
39
|
-
["$$season.season", target_season]
|
40
|
-
}
|
41
|
-
}
|
42
|
-
},
|
43
|
-
"as": "target_season_data",
|
44
|
-
"in": {
|
45
|
-
"year":
|
46
|
-
"$$target_season_data.year",
|
47
|
-
"season":
|
48
|
-
"$$target_season_data.season",
|
49
|
-
"profit_lose":
|
50
|
-
"$$target_season_data.profit_lose"
|
51
|
-
}
|
52
|
-
}
|
53
|
-
},
|
54
|
-
"sortBy": {
|
55
|
-
"year": -1
|
56
|
-
} # 按 year 降序排序
|
48
|
+
"seasonal_data": {
|
49
|
+
"$map": {
|
50
|
+
"input": {"$ifNull": ["$seasonal_data", []]},
|
51
|
+
"as": "season",
|
52
|
+
"in": {
|
53
|
+
"year": "$$season.year",
|
54
|
+
"season": "$$season.season",
|
55
|
+
"data": {"$ifNull": [f"$$season.{chart_name}", []]}
|
56
|
+
}
|
57
57
|
}
|
58
58
|
}
|
59
59
|
}
|
60
|
-
}
|
61
|
-
|
62
|
-
return pipeline
|
60
|
+
}
|
61
|
+
]
|
63
62
|
|
64
|
-
|
65
|
-
pipeline = self.prepare_query(target_season)
|
63
|
+
pipeline = pipeline + append_pipeline
|
66
64
|
|
67
|
-
|
65
|
+
return pipeline
|
68
66
|
|
69
|
-
|
67
|
+
def collect_data(self):
|
68
|
+
return super().collect_data()
|
70
69
|
|
71
70
|
def query_data(self):
|
72
|
-
|
73
|
-
latest_time = StatsDateTime.get_latest_time(
|
74
|
-
self.ticker, self.collection)['last_update_time']
|
75
|
-
target_season = latest_time['seasonal_data']['latest_season']
|
76
|
-
except Exception as e:
|
77
|
-
today = StatsDateTime.get_today()
|
78
|
-
|
79
|
-
target_season = today.season
|
80
|
-
target_season = target_season - 1 if target_season > 1 else 4
|
71
|
+
fetched_data = self.collect_data()
|
81
72
|
|
82
|
-
fetched_data =
|
73
|
+
fetched_data = fetched_data[0]
|
83
74
|
|
84
|
-
|
75
|
+
process_fn = self.process_function_map.get(
|
76
|
+
self.collection_name, self.process_data_us
|
77
|
+
)
|
78
|
+
return process_fn(fetched_data)
|
85
79
|
|
86
|
-
def
|
80
|
+
def process_data_twse(self, fetched_data):
|
87
81
|
|
88
|
-
|
82
|
+
latest_time = StatsDateTime.get_latest_time(
|
83
|
+
self.ticker, self.collection
|
84
|
+
).get('last_update_time', {})
|
89
85
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
86
|
+
# 取最新時間資料時間,沒取到就預設去年年底
|
87
|
+
target_year = latest_time.get('seasonal_data', {}).get(
|
88
|
+
'latest_target_year',
|
89
|
+
StatsDateTime.get_today().year - 1
|
90
|
+
)
|
91
|
+
target_season = latest_time.get('seasonal_data',
|
92
|
+
{}).get('latest_season', 4)
|
94
93
|
|
95
94
|
return_dict = {
|
96
|
-
"ticker":
|
95
|
+
"ticker": self.ticker,
|
97
96
|
"company_name": fetched_data['company_name'],
|
98
97
|
}
|
99
98
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
99
|
+
seasonal_data = fetched_data.get('seasonal_data', [])
|
100
|
+
|
101
|
+
if (not seasonal_data):
|
102
|
+
return_dict.update(self._get_empty_structure())
|
103
|
+
return return_dict
|
104
|
+
|
105
|
+
profit_lose_dict = {
|
106
|
+
f"{data['year']}Q{data['season']}": data['data']
|
107
|
+
for data in seasonal_data
|
108
|
+
}
|
109
|
+
|
110
|
+
profit_lose_dict = YoY_Calculator.cal_QoQ(profit_lose_dict)
|
111
|
+
profit_lose_df = pd.DataFrame.from_dict(profit_lose_dict)
|
112
|
+
target_season_col = profit_lose_df.columns.str.endswith(
|
113
|
+
f"Q{target_season}"
|
114
|
+
)
|
115
|
+
profit_lose_df = profit_lose_df.loc[:, target_season_col]
|
116
|
+
|
117
|
+
old_profit_lose_df = StatsProcessor.expand_value_percentage(
|
118
|
+
profit_lose_df
|
119
|
+
)
|
120
|
+
# OLD: 回傳包含value & percentage
|
121
|
+
value_col = old_profit_lose_df.columns.str.endswith(f"_value")
|
122
|
+
percentage_col = old_profit_lose_df.columns.str.endswith(f"_percentage")
|
123
|
+
# OLD: 回傳剔除grand_total
|
124
|
+
grand_total_value_col = old_profit_lose_df.columns.str.endswith(
|
125
|
+
f"grand_total_value"
|
126
|
+
)
|
127
|
+
grand_total_percentage_col = old_profit_lose_df.columns.str.endswith(
|
128
|
+
f"grand_total_percentage"
|
129
|
+
)
|
130
|
+
|
131
|
+
old_profit_lose_df = old_profit_lose_df.loc[:, (
|
132
|
+
(value_col & ~grand_total_value_col) |
|
133
|
+
(percentage_col & ~grand_total_percentage_col)
|
134
|
+
)]
|
135
|
+
|
136
|
+
for time_index, data_dict in profit_lose_dict.items():
|
137
|
+
profit_lose_dict[time_index] = self.flatten_dict(
|
138
|
+
value_dict=data_dict,
|
139
|
+
indexes=list(data_dict.keys()),
|
140
|
+
target_keys=[
|
141
|
+
"value", "growth", "percentage", "grand_total",
|
142
|
+
"grand_total_percentage"
|
143
|
+
] + [f"YoY_{i}" for i in [1, 3, 5, 10]] +
|
144
|
+
[f"grand_total_YoY_{i}" for i in [1, 3, 5, 10]]
|
145
|
+
)
|
146
|
+
|
147
|
+
profit_lose_df = pd.DataFrame.from_dict(profit_lose_dict).T
|
148
|
+
# EPS的value用元計算
|
149
|
+
eps_index = (
|
150
|
+
profit_lose_df.columns.str.endswith("_value")
|
151
|
+
& profit_lose_df.columns.str.contains("每股盈餘")
|
152
|
+
)
|
153
|
+
eps_copy = profit_lose_df.loc[:, eps_index].copy()
|
154
|
+
eps_mask_index = eps_copy.columns
|
155
|
+
profit_lose_df[eps_mask_index] = profit_lose_df[eps_mask_index].map(
|
156
|
+
lambda x: StatsProcessor.cal_non_percentage(x, postfix="元")
|
157
|
+
)
|
158
|
+
|
159
|
+
# percentage處理
|
160
|
+
percentage_index = profit_lose_df.columns.str.endswith("percentage")
|
161
|
+
growth_index = profit_lose_df.columns.str.endswith("growth")
|
162
|
+
percentage_mask = (percentage_index | growth_index)
|
163
|
+
percentage_copy = profit_lose_df.loc[:, percentage_mask]
|
164
|
+
percentage_mask_index = percentage_copy.columns
|
165
|
+
|
166
|
+
profit_lose_df[percentage_mask_index] = profit_lose_df[
|
167
|
+
percentage_mask_index].map(
|
168
|
+
lambda x: StatsProcessor.
|
169
|
+
cal_non_percentage(x, to_str=True, postfix="%")
|
170
|
+
)
|
171
|
+
|
172
|
+
# YoY處理: 乘以100
|
173
|
+
YoY_index = profit_lose_df.columns.str.contains("YoY")
|
174
|
+
YoY_mask = YoY_index
|
175
|
+
YoY_copy = profit_lose_df.loc[:, YoY_mask]
|
176
|
+
YoY_mask_cols = YoY_copy.columns
|
177
|
+
|
178
|
+
profit_lose_df[YoY_mask_cols] = profit_lose_df[YoY_mask_cols].map(
|
179
|
+
lambda x: StatsProcessor.cal_percentage(x)
|
180
|
+
)
|
181
|
+
|
182
|
+
# 剩下的處理: 乘以千元
|
183
|
+
value_index = ~(
|
184
|
+
percentage_index | growth_index | YoY_index | eps_index
|
185
|
+
) # 除了上述以外的 index
|
186
|
+
|
187
|
+
value_col = profit_lose_df.loc[:, value_index].columns
|
188
|
+
profit_lose_df[value_col] = profit_lose_df[value_col].map(
|
189
|
+
lambda x: StatsProcessor.cal_non_percentage(x, postfix="千元")
|
190
|
+
)
|
191
|
+
|
192
|
+
total_table = profit_lose_df.replace("N/A", None).T
|
193
|
+
|
194
|
+
# 取特定季度
|
195
|
+
target_season_columns = total_table.columns.str.endswith(
|
196
|
+
f"Q{target_season}"
|
197
|
+
)
|
198
|
+
total_table_YoY = total_table.loc[:, target_season_columns]
|
138
199
|
|
139
200
|
for name, setting in self.table_settings.items():
|
140
|
-
|
141
|
-
target_indexes = [target.strip() for target in setting['target_index']]
|
142
|
-
else:
|
143
|
-
target_indexes = [None]
|
144
|
-
|
201
|
+
target_indexes = setting.get('target_index', [None])
|
145
202
|
for target_index in target_indexes:
|
146
203
|
try:
|
147
|
-
return_dict[name] = StatsProcessor.
|
148
|
-
total_table=
|
204
|
+
return_dict[name] = StatsProcessor.slice_table(
|
205
|
+
total_table=total_table_YoY,
|
149
206
|
mode=setting['mode'],
|
150
|
-
target_index=target_index
|
207
|
+
target_index=target_index
|
208
|
+
)
|
151
209
|
break
|
152
210
|
except Exception as e:
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
211
|
+
continue
|
212
|
+
|
213
|
+
return_dict.update(
|
214
|
+
{
|
215
|
+
"profit_lose": old_profit_lose_df,
|
216
|
+
"profit_lose_all": total_table.copy(),
|
217
|
+
"profit_lose_YoY": total_table_YoY
|
218
|
+
}
|
219
|
+
)
|
220
|
+
return return_dict
|
221
|
+
|
222
|
+
def process_data_us(self, fetched_data):
|
223
|
+
|
224
|
+
table_dict = {
|
225
|
+
f"{data['year']}Q{data['season']}": data['profit_lose']
|
226
|
+
for data in fetched_data
|
227
|
+
}
|
228
|
+
|
229
|
+
table_dict = YoY_Calculator.cal_QoQ(table_dict)
|
230
|
+
table_dict = YoY_Calculator.cal_YoY(table_dict)
|
231
|
+
|
232
|
+
for time_index, data_dict in table_dict.items():
|
233
|
+
table_dict[time_index] = self.flatten_dict(
|
234
|
+
value_dict=data_dict,
|
235
|
+
indexes=list(data_dict.keys()),
|
236
|
+
target_keys=["value", "growth"] +
|
237
|
+
[f"YoY_{i}" for i in [1, 3, 5, 10]]
|
238
|
+
)
|
239
|
+
|
240
|
+
# 計算QoQ
|
241
|
+
|
242
|
+
return_dict = {
|
243
|
+
"ticker": self.ticker,
|
244
|
+
"company_name": fetched_data[-1]['company_name'],
|
245
|
+
"profit_lose": pd.DataFrame.from_dict(table_dict)
|
246
|
+
}
|
157
247
|
|
158
248
|
return return_dict
|
249
|
+
|
250
|
+
def _get_empty_structure(self):
|
251
|
+
return {
|
252
|
+
key: pd.DataFrame(columns= pd.Index([], name = 'date')) for key in self.return_keys
|
253
|
+
}
|
neurostats_API/fetchers/tech.py
CHANGED
@@ -12,7 +12,10 @@ class TechFetcher(StatsFetcher):
|
|
12
12
|
"""
|
13
13
|
|
14
14
|
super().__init__(ticker, db_client)
|
15
|
-
|
15
|
+
if (ticker in self.tw_company_list.keys()):
|
16
|
+
self.twse_collection = self.db['twse_stats']
|
17
|
+
self.tej_collection = self.db["TWN/APIPRCD"]
|
18
|
+
|
16
19
|
self.full_ohlcv = self._get_ohlcv()
|
17
20
|
self.basic_indexes = [
|
18
21
|
'SMA5', 'SMA20', 'SMA60', 'EMA5', 'EMA20', 'EMA40', 'EMA12',
|
@@ -51,53 +54,47 @@ class TechFetcher(StatsFetcher):
|
|
51
54
|
|
52
55
|
required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
|
53
56
|
|
54
|
-
|
55
|
-
|
56
|
-
if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
|
57
|
-
full_tick = f'^{self.ticker}'
|
58
|
-
else:
|
59
|
-
full_tick = f'{self.ticker}.tw'
|
60
|
-
|
57
|
+
if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
|
58
|
+
full_tick = f'^{self.ticker}'
|
61
59
|
df = self.conduct_yf_search(full_tick)
|
62
60
|
|
63
|
-
|
64
|
-
|
65
|
-
print(f".tw failed, try .two")
|
66
|
-
|
67
|
-
full_tick = f'{self.ticker}.two'
|
68
|
-
|
69
|
-
df = self.conduct_yf_search(full_tick)
|
61
|
+
return df[required_cols]
|
70
62
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
"mdate", "open_d", 'high_d', 'low_d', 'close_d', 'vol'
|
63
|
+
elif(self.ticker in self.tw_company_list.keys()):
|
64
|
+
search_fns = [
|
65
|
+
self.conduct_db_search_twse,
|
66
|
+
self.conduct_db_search_tej,
|
67
|
+
lambda: self.conduct_yf_search(f'{self.ticker}.tw'),
|
68
|
+
lambda: self.conduct_yf_search(f'{self.ticker}.two')
|
78
69
|
]
|
79
|
-
tej_name_proj = {
|
80
|
-
tej_name: org_name
|
81
|
-
for tej_name, org_name in zip(tej_required_cols, required_cols)
|
82
|
-
}
|
83
|
-
|
84
|
-
query = {'ticker': self.ticker}
|
85
|
-
ticker_full = self.collection.find_one(query)
|
86
|
-
|
87
|
-
if not ticker_full:
|
88
|
-
raise ValueError("No ticker found in database")
|
89
|
-
|
90
|
-
daily_data = ticker_full.get("data", [])
|
91
|
-
if not isinstance(daily_data, list):
|
92
|
-
raise TypeError("Expected 'daily_data' to be a list.")
|
93
|
-
|
94
|
-
df = pd.DataFrame(daily_data)
|
95
70
|
|
96
|
-
|
97
|
-
|
98
|
-
|
71
|
+
for search_method in search_fns:
|
72
|
+
try:
|
73
|
+
df = search_method()
|
74
|
+
break
|
75
|
+
except (KeyError, ValueError, TypeError):
|
76
|
+
continue
|
77
|
+
else:
|
78
|
+
return pd.DataFrame(columns=required_cols)
|
79
|
+
|
80
|
+
# break跳出後
|
81
|
+
return df[required_cols]
|
82
|
+
else: # 美股
|
83
|
+
search_fns = [
|
84
|
+
self.conduct_db_search_us,
|
85
|
+
lambda : self.conduct_yf_search(f"{self.ticker}")
|
86
|
+
]
|
87
|
+
for search_method in search_fns:
|
88
|
+
try:
|
89
|
+
df = search_method()
|
90
|
+
break
|
91
|
+
except (KeyError, ValueError, TypeError):
|
92
|
+
continue
|
93
|
+
else:
|
94
|
+
df = pd.DataFrame()
|
99
95
|
|
100
|
-
|
96
|
+
return df
|
97
|
+
|
101
98
|
|
102
99
|
def get_daily(self):
|
103
100
|
|
@@ -141,7 +138,85 @@ class TechFetcher(StatsFetcher):
|
|
141
138
|
)
|
142
139
|
|
143
140
|
return df
|
141
|
+
|
142
|
+
def conduct_db_search_tej(self):
|
143
|
+
# 再對TEJ search
|
144
|
+
tej_required_cols = [
|
145
|
+
"mdate", "open_d", 'high_d', 'low_d', 'close_d', 'vol'
|
146
|
+
]
|
147
|
+
|
148
|
+
required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
|
149
|
+
tej_name_proj = {
|
150
|
+
tej_name: org_name
|
151
|
+
for tej_name, org_name in zip(tej_required_cols, required_cols)
|
152
|
+
}
|
153
|
+
|
154
|
+
query = {'ticker': self.ticker}
|
155
|
+
ticker_full = self.collection.find_one(query)
|
156
|
+
|
157
|
+
if not ticker_full:
|
158
|
+
raise ValueError("No ticker found in database")
|
144
159
|
|
160
|
+
daily_data = ticker_full.get("data", [])
|
161
|
+
if not isinstance(daily_data, list):
|
162
|
+
raise TypeError("Expected 'daily_data' to be a list.")
|
163
|
+
|
164
|
+
df = pd.DataFrame(daily_data)
|
165
|
+
|
166
|
+
if not self.has_required_columns(df, tej_required_cols):
|
167
|
+
raise KeyError(f"Missing required columns")
|
168
|
+
df = df.rename(columns=tej_name_proj)
|
169
|
+
|
170
|
+
return df[required_cols]
|
171
|
+
|
172
|
+
def conduct_db_search_us(self):
|
173
|
+
required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
|
174
|
+
|
175
|
+
query = {'ticker': self.ticker}
|
176
|
+
filter_query = {"daily_data" : 1, "_id": 0}
|
177
|
+
ticker_full = self.collection.find_one(query, filter_query)
|
178
|
+
|
179
|
+
if not ticker_full:
|
180
|
+
raise ValueError("No ticker found in database")
|
181
|
+
|
182
|
+
daily_data = ticker_full.get("daily_data", [])
|
183
|
+
if not isinstance(daily_data, list):
|
184
|
+
raise TypeError("Expected 'daily_data' to be a list.")
|
185
|
+
|
186
|
+
df = pd.DataFrame(daily_data)
|
187
|
+
|
188
|
+
if not self.has_required_columns(df, required_cols):
|
189
|
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
190
|
+
missing_cols = ",".join(missing_cols)
|
191
|
+
for col in missing_cols:
|
192
|
+
df[col] = pd.NA
|
193
|
+
|
194
|
+
return df[required_cols]
|
195
|
+
|
196
|
+
|
197
|
+
def conduct_db_search_twse(self):
|
198
|
+
required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
|
199
|
+
match_query = {"ticker" : self.ticker}
|
200
|
+
proj_query = {"_id": 0, "daily_data": 1}
|
201
|
+
|
202
|
+
full_data = self.twse_collection.find_one(match_query, proj_query)
|
203
|
+
|
204
|
+
if (not full_data):
|
205
|
+
raise ValueError("No ticker found in database twse_stats")
|
206
|
+
|
207
|
+
daily_data = full_data.get("daily_data", [])
|
208
|
+
|
209
|
+
if (not isinstance(daily_data, list)):
|
210
|
+
raise ValueError("No ticker found in database twse_stats")
|
211
|
+
|
212
|
+
df = pd.DataFrame(daily_data)
|
213
|
+
if not self.has_required_columns(df, required_cols):
|
214
|
+
raise KeyError(f"Missing required columns")
|
215
|
+
|
216
|
+
df = df[required_cols]
|
217
|
+
df = df.sort_values(by = 'date').drop_duplicates(subset=['date'])
|
218
|
+
|
219
|
+
return df
|
145
220
|
|
146
221
|
class TechProcessor:
|
147
222
|
|