neurostats-API 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neurostats_API/__init__.py +1 -1
- neurostats_API/fetchers/__init__.py +4 -0
- neurostats_API/fetchers/balance_sheet.py +135 -0
- neurostats_API/fetchers/cash_flow.py +184 -0
- neurostats_API/fetchers/finance_overview.py +268 -119
- neurostats_API/fetchers/month_revenue.py +92 -0
- neurostats_API/fetchers/profit_lose.py +141 -0
- neurostats_API/tools/balance_sheet.yaml +26 -0
- neurostats_API/tools/cash_flow_percentage.yaml +39 -0
- neurostats_API/tools/finance_overview_dict.yaml +15 -8
- neurostats_API/tools/profit_lose.yaml +1 -1
- neurostats_API/tools/seasonal_data_field_dict.txt +1 -0
- neurostats_API/utils/data_process.py +149 -3
- {neurostats_API-0.0.6.dist-info → neurostats_API-0.0.8.dist-info}/METADATA +139 -190
- neurostats_API-0.0.8.dist-info/RECORD +26 -0
- neurostats_API-0.0.6.dist-info/RECORD +0 -23
- {neurostats_API-0.0.6.dist-info → neurostats_API-0.0.8.dist-info}/WHEEL +0 -0
- {neurostats_API-0.0.6.dist-info → neurostats_API-0.0.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,92 @@
|
|
1
|
+
from .base import StatsFetcher, StatsDateTime
|
2
|
+
import json
|
3
|
+
import pandas as pd
|
4
|
+
from ..utils import StatsDateTime, StatsProcessor
|
5
|
+
import importlib.resources as pkg_resources
|
6
|
+
import yaml
|
7
|
+
|
8
|
+
|
9
|
+
class MonthRevenueFetcher(StatsFetcher):
|
10
|
+
"""
|
11
|
+
iFa.ai: 財務分析 -> 每月營收
|
12
|
+
"""
|
13
|
+
|
14
|
+
def __init__(self, ticker, db_client):
|
15
|
+
super().__init__(ticker, db_client)
|
16
|
+
|
17
|
+
def prepare_query(self, target_year, target_month):
|
18
|
+
pipeline = super().prepare_query()
|
19
|
+
|
20
|
+
pipeline.append({
|
21
|
+
"$project": {
|
22
|
+
"_id": 0,
|
23
|
+
"ticker": 1,
|
24
|
+
"company_name": 1,
|
25
|
+
"monthly_data": {
|
26
|
+
"$sortArray": {
|
27
|
+
"input": "$monthly_data",
|
28
|
+
"sortBy": {
|
29
|
+
"year": -1,
|
30
|
+
"month": -1
|
31
|
+
}
|
32
|
+
}
|
33
|
+
},
|
34
|
+
}
|
35
|
+
})
|
36
|
+
|
37
|
+
return pipeline
|
38
|
+
|
39
|
+
def collect_data(self, target_year, target_month):
|
40
|
+
pipeline = self.prepare_query(target_year, target_month)
|
41
|
+
|
42
|
+
fetched_data = self.collection.aggregate(pipeline)
|
43
|
+
|
44
|
+
fetched_data = list(fetched_data)
|
45
|
+
|
46
|
+
return fetched_data[-1]
|
47
|
+
|
48
|
+
def query_data(self):
|
49
|
+
today = StatsDateTime.get_today()
|
50
|
+
target_month = today.month
|
51
|
+
target_year = today.year
|
52
|
+
|
53
|
+
# Query data
|
54
|
+
fetched_data = self.collect_data(target_year, target_month)
|
55
|
+
|
56
|
+
return self.process_data(fetched_data)
|
57
|
+
|
58
|
+
def process_data(self, fetched_data):
|
59
|
+
|
60
|
+
monthly_data = fetched_data['monthly_data']
|
61
|
+
target_month = monthly_data[0]['month']
|
62
|
+
monthly_df = pd.DataFrame(monthly_data)
|
63
|
+
target_month_df = monthly_df[monthly_df['month'] == target_month]
|
64
|
+
month_revenue_df = monthly_df.pivot(index='month',
|
65
|
+
columns='year',
|
66
|
+
values='revenue')
|
67
|
+
|
68
|
+
grand_total_df = target_month_df.pivot(index='month',
|
69
|
+
columns='year',
|
70
|
+
values='grand_total')
|
71
|
+
|
72
|
+
grand_total_df.rename(index={target_month: f"grand_total"},
|
73
|
+
inplace=True)
|
74
|
+
month_revenue_df = month_revenue_df.sort_index(ascending = False)
|
75
|
+
month_revenue_df = pd.concat([grand_total_df, month_revenue_df],
|
76
|
+
axis=0)
|
77
|
+
|
78
|
+
fetched_data['month_revenue'] = month_revenue_df[sorted(month_revenue_df.columns, reverse = True)]
|
79
|
+
# 歷年月營收
|
80
|
+
fetched_data[
|
81
|
+
'this_month_revenue_over_years'] = target_month_df.set_index(
|
82
|
+
"year")[["revenue", "revenue_increment_ratio", "YoY_1",
|
83
|
+
"YoY_3", "YoY_5", "YoY_10"]].T
|
84
|
+
# 歷年營收成長量
|
85
|
+
fetched_data['grand_total_over_years'] = target_month_df.set_index(
|
86
|
+
"year")[["grand_total", "grand_total_increment_ratio",
|
87
|
+
"grand_total_YoY_1", "grand_total_YoY_3",
|
88
|
+
"grand_total_YoY_5", "grand_total_YoY_10"]].T
|
89
|
+
|
90
|
+
fetched_data.pop("monthly_data")
|
91
|
+
|
92
|
+
return fetched_data
|
@@ -0,0 +1,141 @@
|
|
1
|
+
from .base import StatsFetcher, StatsDateTime
|
2
|
+
import importlib.resources as pkg_resources
|
3
|
+
import json
|
4
|
+
import numpy as np
|
5
|
+
import pandas as pd
|
6
|
+
from ..utils import StatsDateTime, StatsProcessor
|
7
|
+
import yaml
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
class ProfitLoseFetcher(StatsFetcher):
|
12
|
+
"""
|
13
|
+
iFa.ai: 財務分析 -> 損益表
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self, ticker, db_client):
|
17
|
+
super().__init__(ticker, db_client)
|
18
|
+
|
19
|
+
self.table_settings = StatsProcessor.load_yaml("profit_lose.yaml")
|
20
|
+
|
21
|
+
def prepare_query(self, target_season):
|
22
|
+
pipeline = super().prepare_query()
|
23
|
+
|
24
|
+
target_query = {
|
25
|
+
"year": "$$target_season_data.year",
|
26
|
+
"season": "$$target_season_data.season",
|
27
|
+
"balance_sheet": "$$$$target_season_data.balance_sheet"
|
28
|
+
}
|
29
|
+
|
30
|
+
pipeline.append({
|
31
|
+
"$project": {
|
32
|
+
"_id": 0,
|
33
|
+
"ticker": 1,
|
34
|
+
"company_name": 1,
|
35
|
+
"profit_loses": {
|
36
|
+
"$sortArray": {
|
37
|
+
"input": {
|
38
|
+
"$map": {
|
39
|
+
"input": {
|
40
|
+
"$filter": {
|
41
|
+
"input": "$seasonal_data",
|
42
|
+
"as": "season",
|
43
|
+
"cond": {
|
44
|
+
"$eq":
|
45
|
+
["$$season.season", target_season]
|
46
|
+
}
|
47
|
+
}
|
48
|
+
},
|
49
|
+
"as": "target_season_data",
|
50
|
+
"in": {
|
51
|
+
"year":
|
52
|
+
"$$target_season_data.year",
|
53
|
+
"season":
|
54
|
+
"$$target_season_data.season",
|
55
|
+
"profit_lose":
|
56
|
+
"$$target_season_data.profit_lose"
|
57
|
+
}
|
58
|
+
}
|
59
|
+
},
|
60
|
+
"sortBy": {
|
61
|
+
"year": -1
|
62
|
+
} # 按 year 降序排序
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
})
|
67
|
+
|
68
|
+
return pipeline
|
69
|
+
|
70
|
+
def collect_data(self, target_season):
|
71
|
+
pipeline = self.prepare_query(target_season)
|
72
|
+
|
73
|
+
fetched_data = self.collection.aggregate(pipeline)
|
74
|
+
|
75
|
+
return list(fetched_data)[-1]
|
76
|
+
|
77
|
+
def query_data(self):
|
78
|
+
today = StatsDateTime.get_today()
|
79
|
+
|
80
|
+
target_season = today.season
|
81
|
+
target_season = target_season - 1 if target_season > 1 else 4
|
82
|
+
|
83
|
+
fetched_data = self.collect_data(target_season)
|
84
|
+
|
85
|
+
return self.process_data(fetched_data, target_season)
|
86
|
+
|
87
|
+
def process_data(self, fetched_data, target_season):
|
88
|
+
|
89
|
+
profit_loses = fetched_data['profit_loses']
|
90
|
+
|
91
|
+
index_names = []
|
92
|
+
|
93
|
+
table_dict = dict()
|
94
|
+
grand_total_dict = dict()
|
95
|
+
|
96
|
+
return_dict = {
|
97
|
+
"ticker": fetched_data['ticker'],
|
98
|
+
"company_name": fetched_data['company_name'],
|
99
|
+
}
|
100
|
+
|
101
|
+
for data in profit_loses:
|
102
|
+
year = data['year']
|
103
|
+
|
104
|
+
time_index = f"{year}Q{target_season}"
|
105
|
+
|
106
|
+
# 蒐集整體的keys
|
107
|
+
index_names += list(data['profit_lose'].keys())
|
108
|
+
profit_lose = data['profit_lose']
|
109
|
+
|
110
|
+
for index_name, value_dict in profit_lose.items():
|
111
|
+
# (2020Q1, 項目, 金額或%)
|
112
|
+
for item_name, item in value_dict.items():
|
113
|
+
if (item_name == 'percentage'):
|
114
|
+
if (isinstance(item, (float, int))):
|
115
|
+
item = np.round(item, 2)
|
116
|
+
if ('YoY' in item_name):
|
117
|
+
if (isinstance(item, (float, int))):
|
118
|
+
item = np.round(item * 100, 2)
|
119
|
+
try:
|
120
|
+
table_dict[index_name][(time_index, item_name)] = item
|
121
|
+
|
122
|
+
except KeyError:
|
123
|
+
if (index_name not in table_dict.keys()):
|
124
|
+
table_dict[index_name] = dict()
|
125
|
+
grand_total_dict[index_name] = dict()
|
126
|
+
|
127
|
+
table_dict[index_name][(time_index, item_name)] = item
|
128
|
+
|
129
|
+
total_table = pd.DataFrame.from_dict(table_dict, orient='index')
|
130
|
+
total_table.columns = pd.MultiIndex.from_tuples(total_table.columns)
|
131
|
+
|
132
|
+
total_table = total_table.replace("N/A", None)
|
133
|
+
|
134
|
+
for name, setting in self.table_settings.items():
|
135
|
+
return_dict[name] = StatsProcessor.slice_multi_col_table(
|
136
|
+
total_table=total_table,
|
137
|
+
mode=setting['mode'],
|
138
|
+
target_index=setting['target_index']
|
139
|
+
if "target_index" in setting.keys() else None)
|
140
|
+
|
141
|
+
return return_dict
|
@@ -0,0 +1,26 @@
|
|
1
|
+
balance_sheet:
|
2
|
+
mode: value_and_percentage
|
3
|
+
|
4
|
+
total_asset:
|
5
|
+
mode: value_and_percentage
|
6
|
+
target_index: 資產總額 負債總額 權益總額
|
7
|
+
|
8
|
+
current_asset:
|
9
|
+
mode: value_and_percentage
|
10
|
+
target_index: 流動資產合計
|
11
|
+
|
12
|
+
non_current_asset:
|
13
|
+
mode: value_and_percentage
|
14
|
+
target_index: 非流動資產合計
|
15
|
+
|
16
|
+
current_debt:
|
17
|
+
mode: value_and_percentage
|
18
|
+
target_index: 流動負債合計
|
19
|
+
|
20
|
+
non_current_debt:
|
21
|
+
mode: value_and_percentage
|
22
|
+
target_index: 非流動負債合計
|
23
|
+
|
24
|
+
equity:
|
25
|
+
mode: value_and_percentage
|
26
|
+
target_index: 權益總額
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# 注意此並非用於slicing
|
2
|
+
CASHO:
|
3
|
+
main_index: 營業活動之淨現金流入(流出)
|
4
|
+
index:
|
5
|
+
- 繼續營業單位稅前淨利(淨損)
|
6
|
+
- 收益費損項目合計
|
7
|
+
- 折舊費用
|
8
|
+
- 攤銷費用
|
9
|
+
- 與營業活動相關之資產及負債之淨變動合計
|
10
|
+
- 營業活動之淨現金流入(流出)
|
11
|
+
|
12
|
+
CASHI:
|
13
|
+
main_index: 投資活動之淨現金流入(流出)
|
14
|
+
index:
|
15
|
+
- 投資活動之淨現金流入(流出)
|
16
|
+
- 取得不動產、廠房及設備
|
17
|
+
- 處分不動產、廠房及設備
|
18
|
+
- 取得無形資產
|
19
|
+
- 處分無形資產
|
20
|
+
- 取得透過損益按公允價值衡量之金融資產
|
21
|
+
- 處分透過損益按公允價值衡量之金融資產
|
22
|
+
- 取得透過其他綜合損益按公允價值衡量之金融資產
|
23
|
+
- 處分透過其他綜合損益按公允價值衡量之金融資產
|
24
|
+
- 取得按攤銷後成本衡量之金融資產
|
25
|
+
- 處分按攤銷後成本衡量之金融資產
|
26
|
+
- 按攤銷後成本衡量之金融資產到期還本
|
27
|
+
|
28
|
+
CASHO:
|
29
|
+
main_index: 籌資活動之淨現金流入(流出)
|
30
|
+
index:
|
31
|
+
- 籌資活動之淨現金流入(流出)
|
32
|
+
- 短期借款增加
|
33
|
+
- 短期借款減少
|
34
|
+
- 發行公司債
|
35
|
+
- 償還公司債
|
36
|
+
- 舉借長期借款
|
37
|
+
- 償還長期借款
|
38
|
+
- 發放現金股利
|
39
|
+
- 庫藏股票買回成本
|
@@ -15,22 +15,22 @@ net_income:
|
|
15
15
|
field: 本期淨利(淨損)
|
16
16
|
value: value
|
17
17
|
|
18
|
+
tax_fee:
|
19
|
+
field: 所得稅費用(利益)合計
|
20
|
+
value: value
|
21
|
+
|
18
22
|
# TODO: 以下所爬到的資料都是累計的,Ifa有額外計算當季的變化量
|
19
23
|
operating_cash_flow:
|
20
24
|
field: 營業活動之淨現金流入(流出)
|
21
|
-
value:
|
25
|
+
value: single_season_value
|
22
26
|
|
23
27
|
invest_cash_flow:
|
24
28
|
field: 投資活動之淨現金流入(流出)
|
25
|
-
value:
|
29
|
+
value: single_season_value
|
26
30
|
|
27
31
|
financing_cash_flow:
|
28
32
|
field: 籌資活動之淨現金流入(流出)
|
29
|
-
value:
|
30
|
-
|
31
|
-
fcf:
|
32
|
-
field: 本期現金及約當現金增加(減少)數
|
33
|
-
value: value
|
33
|
+
value: single_season_value
|
34
34
|
# ^^^ 以上皆需要額外在DataBase處理
|
35
35
|
|
36
36
|
# 每股財務狀況
|
@@ -84,7 +84,7 @@ net_income_YoY:
|
|
84
84
|
|
85
85
|
operating_cash_flow_YoY:
|
86
86
|
field: 營業活動之淨現金流入(流出)
|
87
|
-
value:
|
87
|
+
value: single_season_YoY
|
88
88
|
|
89
89
|
# operating_cash_flow_per_share_YoY:
|
90
90
|
# field: 每股營業現金流年成長率
|
@@ -123,6 +123,13 @@ total_liabilities:
|
|
123
123
|
field: 負債總額
|
124
124
|
value: value
|
125
125
|
|
126
|
+
short_term_liabilities:
|
127
|
+
field: 短期借款
|
128
|
+
value: value
|
129
|
+
|
130
|
+
long_term_liabilities:
|
131
|
+
field: 長期借款
|
132
|
+
value: value
|
126
133
|
#
|
127
134
|
cash_and_cash_equivalents:
|
128
135
|
field: 現金及約當現金
|
@@ -1,18 +1,164 @@
|
|
1
1
|
from importlib.resources import files
|
2
2
|
import json
|
3
|
+
import numpy as np
|
4
|
+
import pandas as pd
|
3
5
|
import yaml
|
4
6
|
|
7
|
+
target_metric_dict = {
|
8
|
+
'value': ['value'],
|
9
|
+
'value_and_percentage': ['value', 'percentage'],
|
10
|
+
'percentage': ['percentage'],
|
11
|
+
'grand_total': ['grand_total'],
|
12
|
+
'grand_total_values': ['grand_total', 'grand_total_percentage'],
|
13
|
+
'grand_total_percentage': ['grand_total_percentage'],
|
14
|
+
'growth': [f'YoY_{i}' for i in [1, 3, 5, 10]],
|
15
|
+
'grand_total_growth': [f"grand_total_YoY_{i}" for i in [1, 3, 5, 10]]
|
16
|
+
}
|
17
|
+
|
18
|
+
|
5
19
|
class StatsProcessor:
|
20
|
+
|
6
21
|
@classmethod
|
7
|
-
def load_txt(cls, filename, json_load
|
22
|
+
def load_txt(cls, filename, json_load=True):
|
8
23
|
txt_path = files('neurostats_API.tools').joinpath(filename)
|
9
24
|
with open(txt_path, 'r', encoding='utf-8') as f:
|
10
|
-
data = json.load(f) if (json_load) else f.read()
|
25
|
+
data = json.load(f) if (json_load) else f.read()
|
11
26
|
return data
|
27
|
+
|
12
28
|
@classmethod
|
13
29
|
def load_yaml(cls, filename):
|
14
30
|
yaml_path = files('neurostats_API.tools').joinpath(filename)
|
15
31
|
with open(yaml_path, 'r', encoding='utf-8') as f:
|
16
32
|
data = yaml.safe_load(f)
|
17
33
|
|
18
|
-
return data
|
34
|
+
return data
|
35
|
+
|
36
|
+
@classmethod
|
37
|
+
def expand_value_percentage(cls, dataframe):
|
38
|
+
|
39
|
+
expanded_columns = {}
|
40
|
+
for col in dataframe.columns:
|
41
|
+
# Use json_normalize to split 'value' and 'percentage'
|
42
|
+
expanded_df = pd.json_normalize(
|
43
|
+
dataframe[col]).add_prefix(f"{col}_")
|
44
|
+
expanded_df.index = dataframe.index
|
45
|
+
# Append the expanded columns to the new DataFrame
|
46
|
+
expanded_columns[col] = expanded_df
|
47
|
+
|
48
|
+
expanded_df = pd.concat(expanded_columns.values(), axis=1)
|
49
|
+
|
50
|
+
return expanded_df
|
51
|
+
|
52
|
+
@classmethod
|
53
|
+
def slice_table(
|
54
|
+
cls,
|
55
|
+
total_table,
|
56
|
+
mode='value',
|
57
|
+
target_index=None, # None or Str, 要特別抓哪個index
|
58
|
+
):
|
59
|
+
"""
|
60
|
+
total_table: column應為 <時間>_<單位>
|
61
|
+
對只有單層column的table,切出想要的index
|
62
|
+
"""
|
63
|
+
times = [
|
64
|
+
column.split("_")[0] for column in total_table.columns.unique()
|
65
|
+
] #取出timeIndex
|
66
|
+
try:
|
67
|
+
target_metrics = target_metric_dict[mode]
|
68
|
+
except KeyError as e:
|
69
|
+
return f"mode Error: Get mode should be {list(target_metric_dict.keys())} but get {mode}"
|
70
|
+
|
71
|
+
desired_order = [
|
72
|
+
f"{time}_{value_name}" for time in times
|
73
|
+
for value_name in target_metrics
|
74
|
+
]
|
75
|
+
|
76
|
+
if (target_index):
|
77
|
+
target_index = target_index.split()
|
78
|
+
sliced_table = total_table.loc[target_index, desired_order].T
|
79
|
+
|
80
|
+
return sliced_table.T
|
81
|
+
|
82
|
+
else:
|
83
|
+
return total_table.loc[:, desired_order]
|
84
|
+
|
85
|
+
@classmethod
|
86
|
+
def slice_multi_col_table(
|
87
|
+
cls,
|
88
|
+
total_table,
|
89
|
+
mode='value',
|
90
|
+
target_index=None, # None or Str, 要特別抓哪個index
|
91
|
+
):
|
92
|
+
"""
|
93
|
+
對Multicolumn的dataframe切出目標的index
|
94
|
+
"""
|
95
|
+
times = total_table.columns.get_level_values(0).unique()
|
96
|
+
try:
|
97
|
+
target_metrics = target_metric_dict[mode]
|
98
|
+
except KeyError as e:
|
99
|
+
return f"mode Error: Get mode should be {list(target_metric_dict.keys())} but get {mode}"
|
100
|
+
|
101
|
+
desired_order = [(time, value_name) for time in times
|
102
|
+
for value_name in target_metrics]
|
103
|
+
|
104
|
+
if (target_index):
|
105
|
+
target_index = target_index.split()
|
106
|
+
sliced_table = total_table.loc[
|
107
|
+
target_index, pd.IndexSlice[:,
|
108
|
+
target_metrics]][desired_order].T
|
109
|
+
if (mode == 'value_and_percentage'): # 因應balance_sheet 頁面的格式
|
110
|
+
return_table = sliced_table.T
|
111
|
+
return_table.columns = [
|
112
|
+
"_".join(flatten_indexs)
|
113
|
+
for flatten_indexs in return_table.columns.to_flat_index()
|
114
|
+
]
|
115
|
+
return return_table
|
116
|
+
|
117
|
+
sliced_table = sliced_table.reset_index()
|
118
|
+
sliced_table = sliced_table.pivot(index='level_1',
|
119
|
+
columns='level_0',
|
120
|
+
values=target_index).sort_index(
|
121
|
+
axis=1,
|
122
|
+
level=1,
|
123
|
+
ascending=False)
|
124
|
+
|
125
|
+
sliced_table.columns = sliced_table.columns.get_level_values(1)
|
126
|
+
sliced_table.columns.name = None
|
127
|
+
sliced_table.index.name = None
|
128
|
+
|
129
|
+
return sliced_table.reindex(target_metrics)
|
130
|
+
|
131
|
+
else:
|
132
|
+
return_table = total_table.loc[:, pd.IndexSlice[:,
|
133
|
+
target_metrics]][
|
134
|
+
desired_order]
|
135
|
+
return_table.columns = [
|
136
|
+
"_".join(flatten_indexs)
|
137
|
+
for flatten_indexs in return_table.columns.to_flat_index()
|
138
|
+
]
|
139
|
+
return return_table
|
140
|
+
|
141
|
+
@classmethod
|
142
|
+
def cal_percentage(cls, value, postfix="%"):
|
143
|
+
if (isinstance(value, (float, int))):
|
144
|
+
value = np.round(value * 100 , 2).item()
|
145
|
+
value = f"{value:.2f}{postfix}"
|
146
|
+
|
147
|
+
return value
|
148
|
+
|
149
|
+
else:
|
150
|
+
return value
|
151
|
+
|
152
|
+
@classmethod
|
153
|
+
def cal_non_percentage(cls, value, to_str=False, postfix="元"):
|
154
|
+
if (isinstance(value, (float, int))):
|
155
|
+
value = np.round(value, 2).item()
|
156
|
+
if (to_str):
|
157
|
+
value = f"{value:.2f}{postfix}"
|
158
|
+
return value
|
159
|
+
|
160
|
+
else:
|
161
|
+
return value
|
162
|
+
|
163
|
+
else:
|
164
|
+
return value
|