neurostats-API 0.0.21__tar.gz → 0.0.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neurostats_api-0.0.22/MANIFEST.in +9 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/PKG-INFO +3 -3
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/README.md +1 -1
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/__init__.py +1 -1
- neurostats_api-0.0.22/neurostats_API/fetchers/balance_sheet.py +178 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/base.py +89 -74
- neurostats_api-0.0.22/neurostats_API/fetchers/cash_flow.py +200 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/finance_overview.py +2 -2
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/month_revenue.py +1 -1
- neurostats_api-0.0.22/neurostats_API/fetchers/profit_lose.py +233 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/tech.py +73 -33
- neurostats_api-0.0.22/neurostats_API/fetchers/tej_finance_report.py +361 -0
- neurostats_api-0.0.22/neurostats_API/tools/company_list/tw.json +2175 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/tools/tej_db/tej_db_skip_index.yaml +3 -1
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/tools/tej_db/tej_db_thousand_index.yaml +0 -1
- neurostats_api-0.0.22/neurostats_API/tools/twse/balance_sheet.yaml +35 -0
- neurostats_api-0.0.22/neurostats_API/tools/twse/cash_flow_percentage.yaml +39 -0
- neurostats_api-0.0.22/neurostats_API/tools/twse/finance_overview_dict.yaml +185 -0
- neurostats_api-0.0.22/neurostats_API/tools/twse/profit_lose.yaml +143 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/utils/__init__.py +0 -1
- neurostats_api-0.0.22/neurostats_API/utils/calculate_value.py +124 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/utils/data_process.py +43 -15
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API.egg-info/PKG-INFO +3 -3
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API.egg-info/SOURCES.txt +6 -2
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API.egg-info/requires.txt +1 -1
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/setup.py +3 -4
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/test/test_fetchers.py +45 -22
- neurostats_api-0.0.21/MANIFEST.in +0 -2
- neurostats_api-0.0.21/neurostats_API/fetchers/balance_sheet.py +0 -151
- neurostats_api-0.0.21/neurostats_API/fetchers/cash_flow.py +0 -191
- neurostats_api-0.0.21/neurostats_API/fetchers/profit_lose.py +0 -158
- neurostats_api-0.0.21/neurostats_API/fetchers/tej_finance_report.py +0 -466
- neurostats_api-0.0.21/neurostats_API/utils/calculate_value.py +0 -26
- neurostats_api-0.0.21/neurostats_API/utils/fetcher.py +0 -1056
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/cli.py +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/__init__.py +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/institution.py +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/margin_trading.py +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/value_invest.py +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/main.py +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/tools/tej_db/tej_db_index.yaml +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/tools/tej_db/tej_db_percent_index.yaml +0 -0
- {neurostats_api-0.0.21/neurostats_API/tools → neurostats_api-0.0.22/neurostats_API/tools/twse}/seasonal_data_field_dict.txt +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/utils/datetime.py +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/utils/db_client.py +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API.egg-info/dependency_links.txt +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API.egg-info/top_level.txt +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/setup.cfg +0 -0
- {neurostats_api-0.0.21 → neurostats_api-0.0.22}/test/test_tej.py +0 -0
@@ -0,0 +1,9 @@
|
|
1
|
+
recursive-include neurostats_api/tools/tej_db *.yaml
|
2
|
+
recursive-include neurostats_api/tools/tej_db *.txt
|
3
|
+
recursive-include neurostats_api/tools/tej_db *.json
|
4
|
+
recursive-include neurostats_api/tools/twse *.yaml
|
5
|
+
recursive-include neurostats_api/tools/twse *.txt
|
6
|
+
recursive-include neurostats_api/tools/twse *.json
|
7
|
+
recursive-include neurostats_api/tools/company_list *.yaml
|
8
|
+
recursive-include neurostats_api/tools/company_list *.txt
|
9
|
+
recursive-include neurostats_api/tools/company_list *.json
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: neurostats_API
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.22
|
4
4
|
Summary: The service of NeuroStats website
|
5
5
|
Home-page: https://github.com/NeurowattStats/NeuroStats_API.git
|
6
6
|
Author: JasonWang@Neurowatt
|
@@ -8,7 +8,7 @@ Author-email: jason@neurowatt.ai
|
|
8
8
|
Requires-Python: >=3.6
|
9
9
|
Description-Content-Type: text/markdown
|
10
10
|
Requires-Dist: numpy
|
11
|
-
Requires-Dist: pandas
|
11
|
+
Requires-Dist: pandas
|
12
12
|
Requires-Dist: pymongo
|
13
13
|
Requires-Dist: pytz
|
14
14
|
Requires-Dist: python-dotenv
|
@@ -89,7 +89,7 @@ pip install neurostats-API
|
|
89
89
|
```Python
|
90
90
|
>>> import neurostats_API
|
91
91
|
>>> print(neurostats_API.__version__)
|
92
|
-
0.0.
|
92
|
+
0.0.22
|
93
93
|
```
|
94
94
|
|
95
95
|
### 得到最新一期的評價資料與歷年評價
|
@@ -0,0 +1,178 @@
|
|
1
|
+
from .base import StatsFetcher, StatsDateTime
|
2
|
+
import json
|
3
|
+
import pandas as pd
|
4
|
+
from ..utils import StatsDateTime, StatsProcessor
|
5
|
+
import yaml
|
6
|
+
|
7
|
+
|
8
|
+
class BalanceSheetFetcher(StatsFetcher):
|
9
|
+
"""
|
10
|
+
對應iFa.ai -> 財務分析 -> 資產負債表
|
11
|
+
"""
|
12
|
+
|
13
|
+
def __init__(self, ticker, db_client):
|
14
|
+
super().__init__(ticker, db_client)
|
15
|
+
self.table_settings = StatsProcessor.load_yaml("twse/balance_sheet.yaml")
|
16
|
+
|
17
|
+
self.process_function_map = {
|
18
|
+
"twse_stats": self.process_data_twse,
|
19
|
+
"us_stats": self.process_data_us
|
20
|
+
}
|
21
|
+
|
22
|
+
def prepare_query(self):
|
23
|
+
pipeline = super().prepare_query()
|
24
|
+
|
25
|
+
pipeline = pipeline + [
|
26
|
+
{
|
27
|
+
"$unwind": "$seasonal_data" # 展開 seasonal_data 陣列
|
28
|
+
},
|
29
|
+
{
|
30
|
+
"$project": {
|
31
|
+
"_id": 0,
|
32
|
+
"ticker": 1,
|
33
|
+
"company_name": 1,
|
34
|
+
"year": "$seasonal_data.year",
|
35
|
+
"season": "$seasonal_data.season",
|
36
|
+
"balance_sheet": {
|
37
|
+
"$ifNull": ["$seasonal_data.balance_sheet", []]
|
38
|
+
} # 避免 null
|
39
|
+
}
|
40
|
+
},
|
41
|
+
{
|
42
|
+
"$sort": {
|
43
|
+
"year": -1,
|
44
|
+
"season": -1
|
45
|
+
}
|
46
|
+
}
|
47
|
+
]
|
48
|
+
|
49
|
+
return pipeline
|
50
|
+
|
51
|
+
def collect_data(self):
|
52
|
+
return super().collect_data()
|
53
|
+
|
54
|
+
def query_data(self):
|
55
|
+
fetched_data = self.collect_data()
|
56
|
+
|
57
|
+
process_fn = self.process_function_map[self.collection_name]
|
58
|
+
processed_data = process_fn(fetched_data)
|
59
|
+
return processed_data
|
60
|
+
|
61
|
+
def process_data_twse(self, fetched_data):
|
62
|
+
latest_time = StatsDateTime.get_latest_time(
|
63
|
+
self.ticker, self.collection
|
64
|
+
).get('last_update_time', {})
|
65
|
+
# 取最新時間資料時間,沒取到就預設去年年底
|
66
|
+
target_year = latest_time.get('seasonal_data', {}).get(
|
67
|
+
'latest_target_year',
|
68
|
+
StatsDateTime.get_today().year - 1
|
69
|
+
)
|
70
|
+
target_season = latest_time.get('seasonal_data',
|
71
|
+
{}).get('latest_season', 4)
|
72
|
+
|
73
|
+
return_dict = {
|
74
|
+
"ticker": self.ticker,
|
75
|
+
"company_name": fetched_data[-1]['company_name']
|
76
|
+
}
|
77
|
+
table_dict = {}
|
78
|
+
|
79
|
+
# 將value與percentage跟著年分季度一筆筆取出
|
80
|
+
for data in fetched_data:
|
81
|
+
year, season, balance_sheet = data['year'], data['season'], data[
|
82
|
+
'balance_sheet']
|
83
|
+
time_index = f"{year}Q{season}"
|
84
|
+
|
85
|
+
new_balance_sheet = dict()
|
86
|
+
# 蒐集整體的keys
|
87
|
+
index_names = list(balance_sheet.keys())
|
88
|
+
|
89
|
+
table_dict[time_index] = balance_sheet
|
90
|
+
# flatten dict:
|
91
|
+
# {<key>: {"value": <value>, "percentage": <value>}}
|
92
|
+
# -> {<key>_value: <value>, <key>_percentage:<value>}
|
93
|
+
|
94
|
+
old_balance_sheet = pd.DataFrame(table_dict)
|
95
|
+
target_season_col = old_balance_sheet.columns.str.endswith(f"Q{target_season}")
|
96
|
+
old_balance_sheet = old_balance_sheet.loc[:, target_season_col]
|
97
|
+
old_balance_sheet = StatsProcessor.expand_value_percentage(old_balance_sheet)
|
98
|
+
|
99
|
+
for time_index, data_dict in table_dict.items():
|
100
|
+
new_balance_sheet = self.flatten_dict(
|
101
|
+
balance_sheet,
|
102
|
+
indexes = index_names,
|
103
|
+
target_keys=["value", "percentage"]
|
104
|
+
)
|
105
|
+
|
106
|
+
table_dict[time_index] = new_balance_sheet
|
107
|
+
|
108
|
+
total_table = pd.DataFrame.from_dict(table_dict)
|
109
|
+
value_index = total_table.index.str.endswith("_value")
|
110
|
+
total_table.loc[value_index, :] = (
|
111
|
+
total_table.loc[value_index, :].apply(
|
112
|
+
lambda x: StatsProcessor.cal_non_percentage(x, postfix="元"),
|
113
|
+
axis=0
|
114
|
+
)
|
115
|
+
)
|
116
|
+
|
117
|
+
percenrage_index = total_table.index.str.endswith(
|
118
|
+
"_percentage"
|
119
|
+
)
|
120
|
+
total_table.loc[percenrage_index, :] = (
|
121
|
+
total_table.loc[percenrage_index, :].apply(
|
122
|
+
lambda x: StatsProcessor.
|
123
|
+
cal_non_percentage(x, to_str=True, postfix="%"),
|
124
|
+
axis=0
|
125
|
+
)
|
126
|
+
)
|
127
|
+
|
128
|
+
target_season_columns = total_table.columns.str.endswith(
|
129
|
+
f"Q{target_season}"
|
130
|
+
)
|
131
|
+
total_table_YoY = total_table.loc[:, target_season_columns]
|
132
|
+
|
133
|
+
for name, setting in self.table_settings.items():
|
134
|
+
target_indexes = setting.get('target_index', [None])
|
135
|
+
for target_index in target_indexes:
|
136
|
+
try:
|
137
|
+
return_dict[name] = StatsProcessor.slice_old_table(
|
138
|
+
total_table=old_balance_sheet,
|
139
|
+
target_index=target_index
|
140
|
+
)
|
141
|
+
break
|
142
|
+
except Exception as e:
|
143
|
+
print(f"error : {str(e)}")
|
144
|
+
continue
|
145
|
+
|
146
|
+
return_dict.update(
|
147
|
+
{
|
148
|
+
"balance_sheet": old_balance_sheet,
|
149
|
+
"balance_sheet_all": total_table.copy(),
|
150
|
+
"balance_sheet_YoY": total_table_YoY
|
151
|
+
}
|
152
|
+
)
|
153
|
+
return return_dict
|
154
|
+
|
155
|
+
def process_data_us(self, fetched_data):
|
156
|
+
return_dict = {
|
157
|
+
"ticker": self.ticker,
|
158
|
+
"company_name": fetched_data[-1]['company_name']
|
159
|
+
}
|
160
|
+
|
161
|
+
table_dict = dict()
|
162
|
+
|
163
|
+
for data in fetched_data:
|
164
|
+
year, season, balance_sheet = data['year'], data['season'], data[
|
165
|
+
'balance_sheet']
|
166
|
+
table_dict[f"{year}Q{season}"] = balance_sheet
|
167
|
+
|
168
|
+
table_dict = pd.DataFrame.from_dict(table_dict)
|
169
|
+
|
170
|
+
return_dict["balance_sheet"] = table_dict
|
171
|
+
|
172
|
+
latest_season = fetched_data[0]['season']
|
173
|
+
target_season_columns = table_dict.columns.str.endswith(
|
174
|
+
f"Q{latest_season}"
|
175
|
+
)
|
176
|
+
table_dict_YoY = table_dict.loc[:, target_season_columns]
|
177
|
+
return_dict["balance_sheet_YoY"] = table_dict_YoY
|
178
|
+
return return_dict
|
@@ -1,22 +1,31 @@
|
|
1
1
|
import abc
|
2
|
-
from
|
3
|
-
from pymongo import MongoClient
|
4
|
-
import pandas as pd
|
2
|
+
from datetime import datetime, timedelta, date
|
5
3
|
import json
|
4
|
+
import pandas as pd
|
5
|
+
from pymongo import MongoClient
|
6
6
|
import pytz
|
7
|
-
from
|
7
|
+
from typing import Union
|
8
8
|
from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
|
9
|
-
import yaml
|
10
9
|
|
11
10
|
|
12
|
-
class StatsFetcher:
|
11
|
+
class StatsFetcher(abc.ABC):
|
13
12
|
|
14
|
-
def __init__(self, ticker, db_client):
|
13
|
+
def __init__(self, ticker: str, db_client: MongoClient):
|
15
14
|
self.ticker = ticker
|
16
|
-
self.db = db_client["company"] # Replace with your database name
|
17
|
-
self.collection = self.db["twse_stats"]
|
18
|
-
|
19
15
|
self.timezone = pytz.timezone("Asia/Taipei")
|
16
|
+
self.tw_company_list = StatsProcessor.load_json("company_list/tw.json")
|
17
|
+
db_mapping = {
|
18
|
+
"company": "twse_stats",
|
19
|
+
"company_us": "us_stats",
|
20
|
+
}
|
21
|
+
|
22
|
+
name_mapping = {"company": "台股", "company_us": "美股"}
|
23
|
+
|
24
|
+
db_name = "company" if self.ticker in self.tw_company_list else "company_us"
|
25
|
+
self.db = db_client[db_name]
|
26
|
+
self.collection_name = db_mapping.get(db_name, "unknown")
|
27
|
+
assert self.collection_name != "unknown", f"請確認 {ticker} 是否是 {','.join(list(name_mapping.values()))}"
|
28
|
+
self.collection = db_client[db_name][self.collection_name]
|
20
29
|
|
21
30
|
self.target_metric_dict = {
|
22
31
|
'value': ['value'],
|
@@ -37,40 +46,41 @@ class StatsFetcher:
|
|
37
46
|
}
|
38
47
|
},
|
39
48
|
]
|
49
|
+
|
50
|
+
def query_data(self):
|
51
|
+
return NotImplementedError()
|
40
52
|
|
41
|
-
def collect_data(self
|
53
|
+
def collect_data(self):
|
42
54
|
pipeline = self.prepare_query()
|
43
|
-
|
44
55
|
fetched_data = list(self.collection.aggregate(pipeline))
|
56
|
+
return fetched_data if fetched_data else None
|
45
57
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
year, month, day = [int(num) for num in date_str.split("-")]
|
50
|
-
|
51
|
-
date = datetime.strptime(date_str, "%Y-%m-%d")
|
52
|
-
date = self.timezone.localize(date)
|
53
|
-
|
58
|
+
def str_to_datetime(self, date_str: str) -> StatsDateTime:
|
59
|
+
date = self.timezone.localize(datetime.strptime(date_str, "%Y-%m-%d"))
|
60
|
+
year, month, day = date.year, date.month, date.day
|
54
61
|
season = (month - 1) // 3 + 1
|
55
|
-
|
56
62
|
return StatsDateTime(date, year, month, day, season)
|
57
63
|
|
58
|
-
def has_required_columns(
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
Args:
|
63
|
-
df (pd.DataFrame): The DataFrame to check.
|
64
|
-
required_cols (list, optional): List of required column names.
|
65
|
-
Defaults to ['date', 'open', 'high', 'low', 'close', 'volume'].
|
66
|
-
|
67
|
-
Returns:
|
68
|
-
bool: True if all required columns are present, False otherwise.
|
69
|
-
"""
|
64
|
+
def has_required_columns(
|
65
|
+
self, df: pd.DataFrame, required_cols=None
|
66
|
+
) -> bool:
|
70
67
|
if required_cols is None:
|
71
68
|
required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
|
72
|
-
|
73
69
|
return all(col in df.columns for col in required_cols)
|
70
|
+
|
71
|
+
@staticmethod
|
72
|
+
def flatten_dict(value_dict, indexes, target_keys):
|
73
|
+
indexes = value_dict.keys()
|
74
|
+
new_dict = {}
|
75
|
+
|
76
|
+
for key in indexes:
|
77
|
+
new_dict.update(
|
78
|
+
{
|
79
|
+
f"{key}_{sub_key}": value_dict[key].get(sub_key, None)
|
80
|
+
for sub_key in target_keys
|
81
|
+
}
|
82
|
+
)
|
83
|
+
return new_dict
|
74
84
|
|
75
85
|
|
76
86
|
class BaseTEJFetcher(abc.ABC):
|
@@ -81,21 +91,14 @@ class BaseTEJFetcher(abc.ABC):
|
|
81
91
|
|
82
92
|
def get_latest_data_time(self, ticker):
|
83
93
|
latest_data = self.collection.find_one(
|
84
|
-
{
|
85
|
-
"ticker": ticker
|
86
|
-
},
|
87
|
-
{
|
94
|
+
{"ticker": ticker}, {
|
88
95
|
"last_update": 1,
|
89
96
|
"_id": 0
|
90
97
|
}
|
91
98
|
)
|
92
99
|
|
93
|
-
|
94
|
-
|
95
|
-
except Exception as e:
|
96
|
-
latest_date = None
|
97
|
-
|
98
|
-
return latest_date
|
100
|
+
# return 得到最新日期或None
|
101
|
+
return latest_data.get('last_update', {}).get("latest_data_date", None)
|
99
102
|
|
100
103
|
def process_value(self, value):
|
101
104
|
if isinstance(value, str) and "%" in value:
|
@@ -107,77 +110,89 @@ class BaseTEJFetcher(abc.ABC):
|
|
107
110
|
|
108
111
|
def calculate_growth(self, this_value, last_value, delta):
|
109
112
|
try:
|
110
|
-
return YoY_Calculator.cal_growth(
|
113
|
+
return YoY_Calculator.cal_growth(
|
114
|
+
this_value, last_value, delta
|
115
|
+
) * 100
|
111
116
|
except Exception:
|
112
117
|
return None
|
113
118
|
|
114
|
-
def cal_YoY(
|
119
|
+
def cal_YoY(
|
120
|
+
self, data_dict: dict, start_year: int, end_year: int, season: int
|
121
|
+
):
|
115
122
|
year_shifts = [1, 3, 5, 10]
|
116
123
|
return_dict = {}
|
117
|
-
|
124
|
+
|
118
125
|
for year in range(start_year, end_year + 1):
|
119
126
|
year_data = data_dict.get(f"{year}Q{season}", {}).copy()
|
120
127
|
if not year_data:
|
121
128
|
continue
|
122
|
-
|
123
|
-
for key in list(year_data.
|
129
|
+
|
130
|
+
for key, value in list(year_data.items()):
|
124
131
|
if key == "season":
|
125
132
|
continue
|
126
|
-
|
127
|
-
this_value = self.process_value(
|
133
|
+
|
134
|
+
this_value = self.process_value(value)
|
128
135
|
if this_value is None:
|
129
136
|
year_data.pop(key)
|
130
137
|
continue
|
131
|
-
|
132
|
-
temp_dict = {"value":
|
138
|
+
|
139
|
+
temp_dict = {"value": value}
|
133
140
|
for shift in year_shifts:
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
growth = self.calculate_growth(this_value,
|
141
|
+
past_value = self.process_value(
|
142
|
+
data_dict.get(f"{year - shift}Q{season}", {}).get(key)
|
143
|
+
)
|
144
|
+
growth = self.calculate_growth(this_value, past_value, shift) if past_value else None
|
145
|
+
|
146
|
+
temp_dict[
|
147
|
+
f"YoY_{shift}"
|
148
|
+
] = f"{growth:.2f}%" if growth else None
|
138
149
|
|
139
|
-
temp_dict[f"YoY_{shift}"] = (f"{growth:.2f}%" if growth else None)
|
140
150
|
year_data[key] = temp_dict
|
141
|
-
|
142
151
|
return_dict[f"{year}Q{season}"] = year_data
|
143
|
-
|
144
152
|
return return_dict
|
145
153
|
|
146
154
|
def cal_QoQ(self, data_dict):
|
147
155
|
return_dict = {}
|
148
|
-
|
156
|
+
|
149
157
|
for time_index, this_data in data_dict.items():
|
150
158
|
year, season = map(int, time_index.split("Q"))
|
151
|
-
last_year, last_season = (
|
152
|
-
|
159
|
+
last_year, last_season = (
|
160
|
+
year - 1, 4
|
161
|
+
) if season == 1 else (year, season - 1)
|
162
|
+
|
153
163
|
for key in list(this_data.keys()):
|
154
164
|
if key == "season":
|
155
165
|
continue
|
156
|
-
|
166
|
+
|
157
167
|
this_value = self.process_value(this_data[key])
|
158
168
|
if this_value is None:
|
159
169
|
this_data.pop(key)
|
160
170
|
continue
|
161
|
-
|
171
|
+
|
162
172
|
temp_dict = {"value": this_data[key]}
|
163
|
-
last_value = data_dict.get(
|
173
|
+
last_value = data_dict.get(
|
174
|
+
f"{last_year}Q{last_season}",{}
|
175
|
+
).get(key, {}).get('value')
|
176
|
+
|
164
177
|
last_value = self.process_value(last_value)
|
165
|
-
growth = self.calculate_growth(
|
178
|
+
growth = self.calculate_growth(
|
179
|
+
this_value, last_value, 1
|
180
|
+
) if last_value is not None else None
|
166
181
|
temp_dict['growth'] = (f"{growth:.2f}%" if growth else None)
|
167
|
-
|
182
|
+
|
168
183
|
this_data[key] = temp_dict
|
169
|
-
|
184
|
+
|
170
185
|
return_dict[time_index] = this_data
|
171
|
-
|
186
|
+
|
172
187
|
return return_dict
|
173
188
|
|
174
189
|
def get_dict_of_df(self, data_dict):
|
175
190
|
"""
|
176
191
|
dict[dict] -> dict[df]
|
177
192
|
"""
|
178
|
-
|
179
|
-
|
180
|
-
|
193
|
+
return {
|
194
|
+
key: pd.DataFrame.from_dict(data) for key, data in data_dict.items()
|
195
|
+
}
|
181
196
|
|
182
197
|
def set_time_shift(self, date: Union[str, datetime], period: str):
|
183
198
|
if isinstance(date, str):
|
@@ -197,4 +212,4 @@ class BaseTEJFetcher(abc.ABC):
|
|
197
212
|
if period == "all":
|
198
213
|
return datetime.strptime("1991-01-01", "%Y-%m-%d")
|
199
214
|
|
200
|
-
return date - period_mapping.get(period, timedelta(days=0))
|
215
|
+
return date - period_mapping.get(period, timedelta(days=0)) # 預設為不變"
|
@@ -0,0 +1,200 @@
|
|
1
|
+
from .base import StatsFetcher, StatsDateTime
|
2
|
+
import json
|
3
|
+
import numpy as np
|
4
|
+
import pandas as pd
|
5
|
+
from ..utils import StatsDateTime, StatsProcessor
|
6
|
+
import importlib.resources as pkg_resources
|
7
|
+
import yaml
|
8
|
+
|
9
|
+
class CashFlowFetcher(StatsFetcher):
|
10
|
+
def __init__(self, ticker, db_client):
|
11
|
+
super().__init__(ticker, db_client)
|
12
|
+
|
13
|
+
self.cash_flow_dict = StatsProcessor.load_yaml(
|
14
|
+
"twse/cash_flow_percentage.yaml"
|
15
|
+
) # 計算子表格用
|
16
|
+
|
17
|
+
self.process_function_map = {
|
18
|
+
"twse_stats": self.process_data_twse,
|
19
|
+
"us_stats": self.process_data_us
|
20
|
+
}
|
21
|
+
|
22
|
+
def prepare_query(self):
|
23
|
+
pipeline = super().prepare_query()
|
24
|
+
|
25
|
+
name_map = {
|
26
|
+
"twse_stats": "cash_flow",
|
27
|
+
"us_stats": "cash_flow"
|
28
|
+
}
|
29
|
+
|
30
|
+
|
31
|
+
chart_name = name_map.get(self.collection_name, "cash_flow")
|
32
|
+
|
33
|
+
append_pipeline = [
|
34
|
+
{
|
35
|
+
"$unwind": "$seasonal_data" # 展開 seasonal_data 陣列
|
36
|
+
},
|
37
|
+
{
|
38
|
+
"$project": {
|
39
|
+
"_id": 0,
|
40
|
+
"ticker": 1,
|
41
|
+
"company_name": 1,
|
42
|
+
"year": "$seasonal_data.year",
|
43
|
+
"season": "$seasonal_data.season",
|
44
|
+
"cash_flow": {
|
45
|
+
"$ifNull": [f"$seasonal_data.{chart_name}", []]
|
46
|
+
} # 避免 null
|
47
|
+
}
|
48
|
+
},
|
49
|
+
{
|
50
|
+
"$sort": {
|
51
|
+
"year": -1,
|
52
|
+
"season": -1
|
53
|
+
}
|
54
|
+
}
|
55
|
+
]
|
56
|
+
|
57
|
+
pipeline = pipeline + append_pipeline
|
58
|
+
|
59
|
+
return pipeline
|
60
|
+
|
61
|
+
def collect_data(self):
|
62
|
+
return super().collect_data()
|
63
|
+
|
64
|
+
def query_data(self):
|
65
|
+
fetched_data = self.collect_data()
|
66
|
+
|
67
|
+
process_fn = self.process_function_map.get(self.collection_name, self.process_data_us)
|
68
|
+
return process_fn(fetched_data)
|
69
|
+
|
70
|
+
def process_data_twse(self, fetched_data):
|
71
|
+
"""
|
72
|
+
處理現金流量表頁面的所有表格
|
73
|
+
金流表本身沒有比例 但是Ifa有算,
|
74
|
+
項目所屬的情況也不一(分別所屬營業,投資,籌資三個活動)
|
75
|
+
所以這裡選擇不用slicing處理
|
76
|
+
"""
|
77
|
+
|
78
|
+
index_names = []
|
79
|
+
column_names = []
|
80
|
+
|
81
|
+
table_dict = dict()
|
82
|
+
CASHO_dict = dict()
|
83
|
+
CASHI_dict = dict()
|
84
|
+
CASHF_dict = dict()
|
85
|
+
|
86
|
+
# 處理cash_flow 比例
|
87
|
+
checkpoints = ["營業活動之現金流量-間接法", "投資活動之現金流量", "籌資活動之現金流量", "匯率變動對現金及約當現金之影響"]
|
88
|
+
main_cash_flows = [
|
89
|
+
"營業活動之淨現金流入(流出)", "投資活動之淨現金流入(流出)", "籌資活動之淨現金流入(流出)", None
|
90
|
+
] # 主要的比例對象
|
91
|
+
partial_cash_flows = [CASHO_dict, CASHI_dict, CASHF_dict, dict()]
|
92
|
+
|
93
|
+
# 作法: dictionary中也有checkpoints,如果出現了就換下一個index去計算
|
94
|
+
|
95
|
+
for data in fetched_data:
|
96
|
+
year, season, cash_flow = data['year'], data['season'], data['cash_flow']
|
97
|
+
|
98
|
+
time_index = f"{year}Q{season}"
|
99
|
+
|
100
|
+
main_cash_flow_name = None
|
101
|
+
partial_cash_flow = None
|
102
|
+
next_checkpoint = 0
|
103
|
+
|
104
|
+
temp_dict = {}
|
105
|
+
|
106
|
+
for index_name, cash_flow_value in cash_flow.items():
|
107
|
+
if (next_checkpoint < 3
|
108
|
+
and index_name == checkpoints[next_checkpoint]): # 找到了主要的變動點
|
109
|
+
main_cash_flow_name = main_cash_flows[next_checkpoint]
|
110
|
+
partial_cash_flow = partial_cash_flows[next_checkpoint]
|
111
|
+
partial_cash_flow[time_index] = {}
|
112
|
+
next_checkpoint += 1
|
113
|
+
|
114
|
+
if (isinstance(cash_flow_value, dict)):
|
115
|
+
value = cash_flow_value.get('value', None)
|
116
|
+
else:
|
117
|
+
value = cash_flow_value
|
118
|
+
|
119
|
+
|
120
|
+
main_value = cash_flow.get(main_cash_flow_name, None)
|
121
|
+
if (isinstance(main_value, dict)):
|
122
|
+
main_value = main_value.get('value', None)
|
123
|
+
else:
|
124
|
+
pass
|
125
|
+
|
126
|
+
try:
|
127
|
+
ratio = np.round(
|
128
|
+
(value / main_value) * 100, 2
|
129
|
+
)
|
130
|
+
ratio = f"{ratio}%"
|
131
|
+
except:
|
132
|
+
ratio = None
|
133
|
+
|
134
|
+
value = StatsProcessor.cal_non_percentage(value, postfix="千元")
|
135
|
+
temp_dict[index_name] = {
|
136
|
+
"value" : value,
|
137
|
+
"percentage": ratio
|
138
|
+
}
|
139
|
+
|
140
|
+
partial_cash_flow[time_index][index_name] = temp_dict[index_name]
|
141
|
+
|
142
|
+
table_dict[time_index] = temp_dict
|
143
|
+
index_names += list(cash_flow.keys())
|
144
|
+
|
145
|
+
# 轉成dictionary keys
|
146
|
+
index_names = list(dict.fromkeys(index_names))
|
147
|
+
|
148
|
+
cash_flow_table = pd.DataFrame(table_dict)
|
149
|
+
cash_flow_table_stats = StatsProcessor.expand_value_percentage(cash_flow_table)
|
150
|
+
|
151
|
+
CASHO_table = pd.DataFrame(CASHO_dict)
|
152
|
+
CASHO_table = StatsProcessor.expand_value_percentage(CASHO_table)
|
153
|
+
|
154
|
+
CASHI_table = pd.DataFrame(CASHI_dict)
|
155
|
+
CASHI_table = StatsProcessor.expand_value_percentage(CASHI_table)
|
156
|
+
|
157
|
+
CASHF_table = pd.DataFrame(CASHF_dict)
|
158
|
+
CASHF_table = StatsProcessor.expand_value_percentage(CASHF_table)
|
159
|
+
|
160
|
+
for time_index in table_dict.keys():
|
161
|
+
table_dict[time_index] = self.flatten_dict(table_dict[time_index], index_names, target_keys=['value', 'percentage'])
|
162
|
+
cash_flow_flatten = pd.DataFrame.from_dict(table_dict)
|
163
|
+
|
164
|
+
target_season = fetched_data[0]['season']
|
165
|
+
target_season_column = cash_flow_flatten.columns.str.endswith(f"Q{target_season}")
|
166
|
+
|
167
|
+
return_dict = {
|
168
|
+
"ticker": self.ticker,
|
169
|
+
"company_name": fetched_data[-1]['company_name'],
|
170
|
+
"cash_flow": cash_flow_table_stats,
|
171
|
+
"CASHO": CASHO_table,
|
172
|
+
"CASHI": CASHI_table,
|
173
|
+
"CASHF": CASHF_table,
|
174
|
+
"cash_flow_all": cash_flow_flatten,
|
175
|
+
"cash_flow_YoY": cash_flow_flatten.loc[:, target_season_column]
|
176
|
+
}
|
177
|
+
return return_dict
|
178
|
+
|
179
|
+
def process_data_us(self, fetched_data):
|
180
|
+
|
181
|
+
table_dict = {
|
182
|
+
f"{data['year']}Q{data['season']}": data['cash_flow']
|
183
|
+
for data in fetched_data
|
184
|
+
}
|
185
|
+
|
186
|
+
cash_flow_df = pd.DataFrame.from_dict(table_dict)
|
187
|
+
|
188
|
+
latest_season = fetched_data[0]['season']
|
189
|
+
target_season_columns = cash_flow_df.columns.str.endswith(
|
190
|
+
f"Q{latest_season}"
|
191
|
+
)
|
192
|
+
cash_flow_df_YoY = cash_flow_df.loc[:, target_season_columns]
|
193
|
+
|
194
|
+
return_dict = {
|
195
|
+
"ticker": self.ticker,
|
196
|
+
"company_name": fetched_data[-1]['company_name'],
|
197
|
+
"cash_flow": cash_flow_df,
|
198
|
+
"cash_flow_YoY": cash_flow_df_YoY
|
199
|
+
}
|
200
|
+
return return_dict
|