neurostats-API 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- neurostats_API/__init__.py +1 -1
- neurostats_API/fetchers/__init__.py +4 -0
- neurostats_API/fetchers/balance_sheet.py +135 -0
- neurostats_API/fetchers/cash_flow.py +184 -0
- neurostats_API/fetchers/finance_overview.py +268 -119
- neurostats_API/fetchers/month_revenue.py +92 -0
- neurostats_API/fetchers/profit_lose.py +141 -0
- neurostats_API/tools/balance_sheet.yaml +26 -0
- neurostats_API/tools/cash_flow_percentage.yaml +39 -0
- neurostats_API/tools/finance_overview_dict.yaml +15 -8
- neurostats_API/tools/profit_lose.yaml +1 -1
- neurostats_API/tools/seasonal_data_field_dict.txt +1 -0
- neurostats_API/utils/data_process.py +149 -3
- {neurostats_API-0.0.6.dist-info → neurostats_API-0.0.8.dist-info}/METADATA +139 -190
- neurostats_API-0.0.8.dist-info/RECORD +26 -0
- neurostats_API-0.0.6.dist-info/RECORD +0 -23
- {neurostats_API-0.0.6.dist-info → neurostats_API-0.0.8.dist-info}/WHEEL +0 -0
- {neurostats_API-0.0.6.dist-info → neurostats_API-0.0.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,92 @@
|
|
1
|
+
from .base import StatsFetcher, StatsDateTime
|
2
|
+
import json
|
3
|
+
import pandas as pd
|
4
|
+
from ..utils import StatsDateTime, StatsProcessor
|
5
|
+
import importlib.resources as pkg_resources
|
6
|
+
import yaml
|
7
|
+
|
8
|
+
|
9
|
+
class MonthRevenueFetcher(StatsFetcher):
|
10
|
+
"""
|
11
|
+
iFa.ai: 財務分析 -> 每月營收
|
12
|
+
"""
|
13
|
+
|
14
|
+
def __init__(self, ticker, db_client):
|
15
|
+
super().__init__(ticker, db_client)
|
16
|
+
|
17
|
+
def prepare_query(self, target_year, target_month):
|
18
|
+
pipeline = super().prepare_query()
|
19
|
+
|
20
|
+
pipeline.append({
|
21
|
+
"$project": {
|
22
|
+
"_id": 0,
|
23
|
+
"ticker": 1,
|
24
|
+
"company_name": 1,
|
25
|
+
"monthly_data": {
|
26
|
+
"$sortArray": {
|
27
|
+
"input": "$monthly_data",
|
28
|
+
"sortBy": {
|
29
|
+
"year": -1,
|
30
|
+
"month": -1
|
31
|
+
}
|
32
|
+
}
|
33
|
+
},
|
34
|
+
}
|
35
|
+
})
|
36
|
+
|
37
|
+
return pipeline
|
38
|
+
|
39
|
+
def collect_data(self, target_year, target_month):
|
40
|
+
pipeline = self.prepare_query(target_year, target_month)
|
41
|
+
|
42
|
+
fetched_data = self.collection.aggregate(pipeline)
|
43
|
+
|
44
|
+
fetched_data = list(fetched_data)
|
45
|
+
|
46
|
+
return fetched_data[-1]
|
47
|
+
|
48
|
+
def query_data(self):
|
49
|
+
today = StatsDateTime.get_today()
|
50
|
+
target_month = today.month
|
51
|
+
target_year = today.year
|
52
|
+
|
53
|
+
# Query data
|
54
|
+
fetched_data = self.collect_data(target_year, target_month)
|
55
|
+
|
56
|
+
return self.process_data(fetched_data)
|
57
|
+
|
58
|
+
def process_data(self, fetched_data):
|
59
|
+
|
60
|
+
monthly_data = fetched_data['monthly_data']
|
61
|
+
target_month = monthly_data[0]['month']
|
62
|
+
monthly_df = pd.DataFrame(monthly_data)
|
63
|
+
target_month_df = monthly_df[monthly_df['month'] == target_month]
|
64
|
+
month_revenue_df = monthly_df.pivot(index='month',
|
65
|
+
columns='year',
|
66
|
+
values='revenue')
|
67
|
+
|
68
|
+
grand_total_df = target_month_df.pivot(index='month',
|
69
|
+
columns='year',
|
70
|
+
values='grand_total')
|
71
|
+
|
72
|
+
grand_total_df.rename(index={target_month: f"grand_total"},
|
73
|
+
inplace=True)
|
74
|
+
month_revenue_df = month_revenue_df.sort_index(ascending = False)
|
75
|
+
month_revenue_df = pd.concat([grand_total_df, month_revenue_df],
|
76
|
+
axis=0)
|
77
|
+
|
78
|
+
fetched_data['month_revenue'] = month_revenue_df[sorted(month_revenue_df.columns, reverse = True)]
|
79
|
+
# 歷年月營收
|
80
|
+
fetched_data[
|
81
|
+
'this_month_revenue_over_years'] = target_month_df.set_index(
|
82
|
+
"year")[["revenue", "revenue_increment_ratio", "YoY_1",
|
83
|
+
"YoY_3", "YoY_5", "YoY_10"]].T
|
84
|
+
# 歷年營收成長量
|
85
|
+
fetched_data['grand_total_over_years'] = target_month_df.set_index(
|
86
|
+
"year")[["grand_total", "grand_total_increment_ratio",
|
87
|
+
"grand_total_YoY_1", "grand_total_YoY_3",
|
88
|
+
"grand_total_YoY_5", "grand_total_YoY_10"]].T
|
89
|
+
|
90
|
+
fetched_data.pop("monthly_data")
|
91
|
+
|
92
|
+
return fetched_data
|
@@ -0,0 +1,141 @@
|
|
1
|
+
from .base import StatsFetcher, StatsDateTime
|
2
|
+
import importlib.resources as pkg_resources
|
3
|
+
import json
|
4
|
+
import numpy as np
|
5
|
+
import pandas as pd
|
6
|
+
from ..utils import StatsDateTime, StatsProcessor
|
7
|
+
import yaml
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
class ProfitLoseFetcher(StatsFetcher):
|
12
|
+
"""
|
13
|
+
iFa.ai: 財務分析 -> 損益表
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self, ticker, db_client):
|
17
|
+
super().__init__(ticker, db_client)
|
18
|
+
|
19
|
+
self.table_settings = StatsProcessor.load_yaml("profit_lose.yaml")
|
20
|
+
|
21
|
+
def prepare_query(self, target_season):
|
22
|
+
pipeline = super().prepare_query()
|
23
|
+
|
24
|
+
target_query = {
|
25
|
+
"year": "$$target_season_data.year",
|
26
|
+
"season": "$$target_season_data.season",
|
27
|
+
"balance_sheet": "$$$$target_season_data.balance_sheet"
|
28
|
+
}
|
29
|
+
|
30
|
+
pipeline.append({
|
31
|
+
"$project": {
|
32
|
+
"_id": 0,
|
33
|
+
"ticker": 1,
|
34
|
+
"company_name": 1,
|
35
|
+
"profit_loses": {
|
36
|
+
"$sortArray": {
|
37
|
+
"input": {
|
38
|
+
"$map": {
|
39
|
+
"input": {
|
40
|
+
"$filter": {
|
41
|
+
"input": "$seasonal_data",
|
42
|
+
"as": "season",
|
43
|
+
"cond": {
|
44
|
+
"$eq":
|
45
|
+
["$$season.season", target_season]
|
46
|
+
}
|
47
|
+
}
|
48
|
+
},
|
49
|
+
"as": "target_season_data",
|
50
|
+
"in": {
|
51
|
+
"year":
|
52
|
+
"$$target_season_data.year",
|
53
|
+
"season":
|
54
|
+
"$$target_season_data.season",
|
55
|
+
"profit_lose":
|
56
|
+
"$$target_season_data.profit_lose"
|
57
|
+
}
|
58
|
+
}
|
59
|
+
},
|
60
|
+
"sortBy": {
|
61
|
+
"year": -1
|
62
|
+
} # 按 year 降序排序
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
})
|
67
|
+
|
68
|
+
return pipeline
|
69
|
+
|
70
|
+
def collect_data(self, target_season):
|
71
|
+
pipeline = self.prepare_query(target_season)
|
72
|
+
|
73
|
+
fetched_data = self.collection.aggregate(pipeline)
|
74
|
+
|
75
|
+
return list(fetched_data)[-1]
|
76
|
+
|
77
|
+
def query_data(self):
|
78
|
+
today = StatsDateTime.get_today()
|
79
|
+
|
80
|
+
target_season = today.season
|
81
|
+
target_season = target_season - 1 if target_season > 1 else 4
|
82
|
+
|
83
|
+
fetched_data = self.collect_data(target_season)
|
84
|
+
|
85
|
+
return self.process_data(fetched_data, target_season)
|
86
|
+
|
87
|
+
def process_data(self, fetched_data, target_season):
|
88
|
+
|
89
|
+
profit_loses = fetched_data['profit_loses']
|
90
|
+
|
91
|
+
index_names = []
|
92
|
+
|
93
|
+
table_dict = dict()
|
94
|
+
grand_total_dict = dict()
|
95
|
+
|
96
|
+
return_dict = {
|
97
|
+
"ticker": fetched_data['ticker'],
|
98
|
+
"company_name": fetched_data['company_name'],
|
99
|
+
}
|
100
|
+
|
101
|
+
for data in profit_loses:
|
102
|
+
year = data['year']
|
103
|
+
|
104
|
+
time_index = f"{year}Q{target_season}"
|
105
|
+
|
106
|
+
# 蒐集整體的keys
|
107
|
+
index_names += list(data['profit_lose'].keys())
|
108
|
+
profit_lose = data['profit_lose']
|
109
|
+
|
110
|
+
for index_name, value_dict in profit_lose.items():
|
111
|
+
# (2020Q1, 項目, 金額或%)
|
112
|
+
for item_name, item in value_dict.items():
|
113
|
+
if (item_name == 'percentage'):
|
114
|
+
if (isinstance(item, (float, int))):
|
115
|
+
item = np.round(item, 2)
|
116
|
+
if ('YoY' in item_name):
|
117
|
+
if (isinstance(item, (float, int))):
|
118
|
+
item = np.round(item * 100, 2)
|
119
|
+
try:
|
120
|
+
table_dict[index_name][(time_index, item_name)] = item
|
121
|
+
|
122
|
+
except KeyError:
|
123
|
+
if (index_name not in table_dict.keys()):
|
124
|
+
table_dict[index_name] = dict()
|
125
|
+
grand_total_dict[index_name] = dict()
|
126
|
+
|
127
|
+
table_dict[index_name][(time_index, item_name)] = item
|
128
|
+
|
129
|
+
total_table = pd.DataFrame.from_dict(table_dict, orient='index')
|
130
|
+
total_table.columns = pd.MultiIndex.from_tuples(total_table.columns)
|
131
|
+
|
132
|
+
total_table = total_table.replace("N/A", None)
|
133
|
+
|
134
|
+
for name, setting in self.table_settings.items():
|
135
|
+
return_dict[name] = StatsProcessor.slice_multi_col_table(
|
136
|
+
total_table=total_table,
|
137
|
+
mode=setting['mode'],
|
138
|
+
target_index=setting['target_index']
|
139
|
+
if "target_index" in setting.keys() else None)
|
140
|
+
|
141
|
+
return return_dict
|
@@ -0,0 +1,26 @@
|
|
1
|
+
balance_sheet:
|
2
|
+
mode: value_and_percentage
|
3
|
+
|
4
|
+
total_asset:
|
5
|
+
mode: value_and_percentage
|
6
|
+
target_index: 資產總額 負債總額 權益總額
|
7
|
+
|
8
|
+
current_asset:
|
9
|
+
mode: value_and_percentage
|
10
|
+
target_index: 流動資產合計
|
11
|
+
|
12
|
+
non_current_asset:
|
13
|
+
mode: value_and_percentage
|
14
|
+
target_index: 非流動資產合計
|
15
|
+
|
16
|
+
current_debt:
|
17
|
+
mode: value_and_percentage
|
18
|
+
target_index: 流動負債合計
|
19
|
+
|
20
|
+
non_current_debt:
|
21
|
+
mode: value_and_percentage
|
22
|
+
target_index: 非流動負債合計
|
23
|
+
|
24
|
+
equity:
|
25
|
+
mode: value_and_percentage
|
26
|
+
target_index: 權益總額
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# 注意此並非用於slicing
|
2
|
+
CASHO:
|
3
|
+
main_index: 營業活動之淨現金流入(流出)
|
4
|
+
index:
|
5
|
+
- 繼續營業單位稅前淨利(淨損)
|
6
|
+
- 收益費損項目合計
|
7
|
+
- 折舊費用
|
8
|
+
- 攤銷費用
|
9
|
+
- 與營業活動相關之資產及負債之淨變動合計
|
10
|
+
- 營業活動之淨現金流入(流出)
|
11
|
+
|
12
|
+
CASHI:
|
13
|
+
main_index: 投資活動之淨現金流入(流出)
|
14
|
+
index:
|
15
|
+
- 投資活動之淨現金流入(流出)
|
16
|
+
- 取得不動產、廠房及設備
|
17
|
+
- 處分不動產、廠房及設備
|
18
|
+
- 取得無形資產
|
19
|
+
- 處分無形資產
|
20
|
+
- 取得透過損益按公允價值衡量之金融資產
|
21
|
+
- 處分透過損益按公允價值衡量之金融資產
|
22
|
+
- 取得透過其他綜合損益按公允價值衡量之金融資產
|
23
|
+
- 處分透過其他綜合損益按公允價值衡量之金融資產
|
24
|
+
- 取得按攤銷後成本衡量之金融資產
|
25
|
+
- 處分按攤銷後成本衡量之金融資產
|
26
|
+
- 按攤銷後成本衡量之金融資產到期還本
|
27
|
+
|
28
|
+
CASHO:
|
29
|
+
main_index: 籌資活動之淨現金流入(流出)
|
30
|
+
index:
|
31
|
+
- 籌資活動之淨現金流入(流出)
|
32
|
+
- 短期借款增加
|
33
|
+
- 短期借款減少
|
34
|
+
- 發行公司債
|
35
|
+
- 償還公司債
|
36
|
+
- 舉借長期借款
|
37
|
+
- 償還長期借款
|
38
|
+
- 發放現金股利
|
39
|
+
- 庫藏股票買回成本
|
@@ -15,22 +15,22 @@ net_income:
|
|
15
15
|
field: 本期淨利(淨損)
|
16
16
|
value: value
|
17
17
|
|
18
|
+
tax_fee:
|
19
|
+
field: 所得稅費用(利益)合計
|
20
|
+
value: value
|
21
|
+
|
18
22
|
# TODO: 以下所爬到的資料都是累計的,Ifa有額外計算當季的變化量
|
19
23
|
operating_cash_flow:
|
20
24
|
field: 營業活動之淨現金流入(流出)
|
21
|
-
value:
|
25
|
+
value: single_season_value
|
22
26
|
|
23
27
|
invest_cash_flow:
|
24
28
|
field: 投資活動之淨現金流入(流出)
|
25
|
-
value:
|
29
|
+
value: single_season_value
|
26
30
|
|
27
31
|
financing_cash_flow:
|
28
32
|
field: 籌資活動之淨現金流入(流出)
|
29
|
-
value:
|
30
|
-
|
31
|
-
fcf:
|
32
|
-
field: 本期現金及約當現金增加(減少)數
|
33
|
-
value: value
|
33
|
+
value: single_season_value
|
34
34
|
# ^^^ 以上皆需要額外在DataBase處理
|
35
35
|
|
36
36
|
# 每股財務狀況
|
@@ -84,7 +84,7 @@ net_income_YoY:
|
|
84
84
|
|
85
85
|
operating_cash_flow_YoY:
|
86
86
|
field: 營業活動之淨現金流入(流出)
|
87
|
-
value:
|
87
|
+
value: single_season_YoY
|
88
88
|
|
89
89
|
# operating_cash_flow_per_share_YoY:
|
90
90
|
# field: 每股營業現金流年成長率
|
@@ -123,6 +123,13 @@ total_liabilities:
|
|
123
123
|
field: 負債總額
|
124
124
|
value: value
|
125
125
|
|
126
|
+
short_term_liabilities:
|
127
|
+
field: 短期借款
|
128
|
+
value: value
|
129
|
+
|
130
|
+
long_term_liabilities:
|
131
|
+
field: 長期借款
|
132
|
+
value: value
|
126
133
|
#
|
127
134
|
cash_and_cash_equivalents:
|
128
135
|
field: 現金及約當現金
|
@@ -1,18 +1,164 @@
|
|
1
1
|
from importlib.resources import files
|
2
2
|
import json
|
3
|
+
import numpy as np
|
4
|
+
import pandas as pd
|
3
5
|
import yaml
|
4
6
|
|
7
|
+
target_metric_dict = {
|
8
|
+
'value': ['value'],
|
9
|
+
'value_and_percentage': ['value', 'percentage'],
|
10
|
+
'percentage': ['percentage'],
|
11
|
+
'grand_total': ['grand_total'],
|
12
|
+
'grand_total_values': ['grand_total', 'grand_total_percentage'],
|
13
|
+
'grand_total_percentage': ['grand_total_percentage'],
|
14
|
+
'growth': [f'YoY_{i}' for i in [1, 3, 5, 10]],
|
15
|
+
'grand_total_growth': [f"grand_total_YoY_{i}" for i in [1, 3, 5, 10]]
|
16
|
+
}
|
17
|
+
|
18
|
+
|
5
19
|
class StatsProcessor:
|
20
|
+
|
6
21
|
@classmethod
|
7
|
-
def load_txt(cls, filename, json_load
|
22
|
+
def load_txt(cls, filename, json_load=True):
|
8
23
|
txt_path = files('neurostats_API.tools').joinpath(filename)
|
9
24
|
with open(txt_path, 'r', encoding='utf-8') as f:
|
10
|
-
data = json.load(f) if (json_load) else f.read()
|
25
|
+
data = json.load(f) if (json_load) else f.read()
|
11
26
|
return data
|
27
|
+
|
12
28
|
@classmethod
|
13
29
|
def load_yaml(cls, filename):
|
14
30
|
yaml_path = files('neurostats_API.tools').joinpath(filename)
|
15
31
|
with open(yaml_path, 'r', encoding='utf-8') as f:
|
16
32
|
data = yaml.safe_load(f)
|
17
33
|
|
18
|
-
return data
|
34
|
+
return data
|
35
|
+
|
36
|
+
@classmethod
|
37
|
+
def expand_value_percentage(cls, dataframe):
|
38
|
+
|
39
|
+
expanded_columns = {}
|
40
|
+
for col in dataframe.columns:
|
41
|
+
# Use json_normalize to split 'value' and 'percentage'
|
42
|
+
expanded_df = pd.json_normalize(
|
43
|
+
dataframe[col]).add_prefix(f"{col}_")
|
44
|
+
expanded_df.index = dataframe.index
|
45
|
+
# Append the expanded columns to the new DataFrame
|
46
|
+
expanded_columns[col] = expanded_df
|
47
|
+
|
48
|
+
expanded_df = pd.concat(expanded_columns.values(), axis=1)
|
49
|
+
|
50
|
+
return expanded_df
|
51
|
+
|
52
|
+
@classmethod
|
53
|
+
def slice_table(
|
54
|
+
cls,
|
55
|
+
total_table,
|
56
|
+
mode='value',
|
57
|
+
target_index=None, # None or Str, 要特別抓哪個index
|
58
|
+
):
|
59
|
+
"""
|
60
|
+
total_table: column應為 <時間>_<單位>
|
61
|
+
對只有單層column的table,切出想要的index
|
62
|
+
"""
|
63
|
+
times = [
|
64
|
+
column.split("_")[0] for column in total_table.columns.unique()
|
65
|
+
] #取出timeIndex
|
66
|
+
try:
|
67
|
+
target_metrics = target_metric_dict[mode]
|
68
|
+
except KeyError as e:
|
69
|
+
return f"mode Error: Get mode should be {list(target_metric_dict.keys())} but get {mode}"
|
70
|
+
|
71
|
+
desired_order = [
|
72
|
+
f"{time}_{value_name}" for time in times
|
73
|
+
for value_name in target_metrics
|
74
|
+
]
|
75
|
+
|
76
|
+
if (target_index):
|
77
|
+
target_index = target_index.split()
|
78
|
+
sliced_table = total_table.loc[target_index, desired_order].T
|
79
|
+
|
80
|
+
return sliced_table.T
|
81
|
+
|
82
|
+
else:
|
83
|
+
return total_table.loc[:, desired_order]
|
84
|
+
|
85
|
+
@classmethod
|
86
|
+
def slice_multi_col_table(
|
87
|
+
cls,
|
88
|
+
total_table,
|
89
|
+
mode='value',
|
90
|
+
target_index=None, # None or Str, 要特別抓哪個index
|
91
|
+
):
|
92
|
+
"""
|
93
|
+
對Multicolumn的dataframe切出目標的index
|
94
|
+
"""
|
95
|
+
times = total_table.columns.get_level_values(0).unique()
|
96
|
+
try:
|
97
|
+
target_metrics = target_metric_dict[mode]
|
98
|
+
except KeyError as e:
|
99
|
+
return f"mode Error: Get mode should be {list(target_metric_dict.keys())} but get {mode}"
|
100
|
+
|
101
|
+
desired_order = [(time, value_name) for time in times
|
102
|
+
for value_name in target_metrics]
|
103
|
+
|
104
|
+
if (target_index):
|
105
|
+
target_index = target_index.split()
|
106
|
+
sliced_table = total_table.loc[
|
107
|
+
target_index, pd.IndexSlice[:,
|
108
|
+
target_metrics]][desired_order].T
|
109
|
+
if (mode == 'value_and_percentage'): # 因應balance_sheet 頁面的格式
|
110
|
+
return_table = sliced_table.T
|
111
|
+
return_table.columns = [
|
112
|
+
"_".join(flatten_indexs)
|
113
|
+
for flatten_indexs in return_table.columns.to_flat_index()
|
114
|
+
]
|
115
|
+
return return_table
|
116
|
+
|
117
|
+
sliced_table = sliced_table.reset_index()
|
118
|
+
sliced_table = sliced_table.pivot(index='level_1',
|
119
|
+
columns='level_0',
|
120
|
+
values=target_index).sort_index(
|
121
|
+
axis=1,
|
122
|
+
level=1,
|
123
|
+
ascending=False)
|
124
|
+
|
125
|
+
sliced_table.columns = sliced_table.columns.get_level_values(1)
|
126
|
+
sliced_table.columns.name = None
|
127
|
+
sliced_table.index.name = None
|
128
|
+
|
129
|
+
return sliced_table.reindex(target_metrics)
|
130
|
+
|
131
|
+
else:
|
132
|
+
return_table = total_table.loc[:, pd.IndexSlice[:,
|
133
|
+
target_metrics]][
|
134
|
+
desired_order]
|
135
|
+
return_table.columns = [
|
136
|
+
"_".join(flatten_indexs)
|
137
|
+
for flatten_indexs in return_table.columns.to_flat_index()
|
138
|
+
]
|
139
|
+
return return_table
|
140
|
+
|
141
|
+
@classmethod
|
142
|
+
def cal_percentage(cls, value, postfix="%"):
|
143
|
+
if (isinstance(value, (float, int))):
|
144
|
+
value = np.round(value * 100 , 2).item()
|
145
|
+
value = f"{value:.2f}{postfix}"
|
146
|
+
|
147
|
+
return value
|
148
|
+
|
149
|
+
else:
|
150
|
+
return value
|
151
|
+
|
152
|
+
@classmethod
|
153
|
+
def cal_non_percentage(cls, value, to_str=False, postfix="元"):
|
154
|
+
if (isinstance(value, (float, int))):
|
155
|
+
value = np.round(value, 2).item()
|
156
|
+
if (to_str):
|
157
|
+
value = f"{value:.2f}{postfix}"
|
158
|
+
return value
|
159
|
+
|
160
|
+
else:
|
161
|
+
return value
|
162
|
+
|
163
|
+
else:
|
164
|
+
return value
|