neurostats-API 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,92 @@
1
+ from .base import StatsFetcher, StatsDateTime
2
+ import json
3
+ import pandas as pd
4
+ from ..utils import StatsDateTime, StatsProcessor
5
+ import importlib.resources as pkg_resources
6
+ import yaml
7
+
8
+
9
+ class MonthRevenueFetcher(StatsFetcher):
10
+ """
11
+ iFa.ai: 財務分析 -> 每月營收
12
+ """
13
+
14
+ def __init__(self, ticker, db_client):
15
+ super().__init__(ticker, db_client)
16
+
17
+ def prepare_query(self, target_year, target_month):
18
+ pipeline = super().prepare_query()
19
+
20
+ pipeline.append({
21
+ "$project": {
22
+ "_id": 0,
23
+ "ticker": 1,
24
+ "company_name": 1,
25
+ "monthly_data": {
26
+ "$sortArray": {
27
+ "input": "$monthly_data",
28
+ "sortBy": {
29
+ "year": -1,
30
+ "month": -1
31
+ }
32
+ }
33
+ },
34
+ }
35
+ })
36
+
37
+ return pipeline
38
+
39
+ def collect_data(self, target_year, target_month):
40
+ pipeline = self.prepare_query(target_year, target_month)
41
+
42
+ fetched_data = self.collection.aggregate(pipeline)
43
+
44
+ fetched_data = list(fetched_data)
45
+
46
+ return fetched_data[-1]
47
+
48
+ def query_data(self):
49
+ today = StatsDateTime.get_today()
50
+ target_month = today.month
51
+ target_year = today.year
52
+
53
+ # Query data
54
+ fetched_data = self.collect_data(target_year, target_month)
55
+
56
+ return self.process_data(fetched_data)
57
+
58
+ def process_data(self, fetched_data):
59
+
60
+ monthly_data = fetched_data['monthly_data']
61
+ target_month = monthly_data[0]['month']
62
+ monthly_df = pd.DataFrame(monthly_data)
63
+ target_month_df = monthly_df[monthly_df['month'] == target_month]
64
+ month_revenue_df = monthly_df.pivot(index='month',
65
+ columns='year',
66
+ values='revenue')
67
+
68
+ grand_total_df = target_month_df.pivot(index='month',
69
+ columns='year',
70
+ values='grand_total')
71
+
72
+ grand_total_df.rename(index={target_month: f"grand_total"},
73
+ inplace=True)
74
+ month_revenue_df = month_revenue_df.sort_index(ascending = False)
75
+ month_revenue_df = pd.concat([grand_total_df, month_revenue_df],
76
+ axis=0)
77
+
78
+ fetched_data['month_revenue'] = month_revenue_df[sorted(month_revenue_df.columns, reverse = True)]
79
+ # 歷年月營收
80
+ fetched_data[
81
+ 'this_month_revenue_over_years'] = target_month_df.set_index(
82
+ "year")[["revenue", "revenue_increment_ratio", "YoY_1",
83
+ "YoY_3", "YoY_5", "YoY_10"]].T
84
+ # 歷年營收成長量
85
+ fetched_data['grand_total_over_years'] = target_month_df.set_index(
86
+ "year")[["grand_total", "grand_total_increment_ratio",
87
+ "grand_total_YoY_1", "grand_total_YoY_3",
88
+ "grand_total_YoY_5", "grand_total_YoY_10"]].T
89
+
90
+ fetched_data.pop("monthly_data")
91
+
92
+ return fetched_data
@@ -0,0 +1,141 @@
1
+ from .base import StatsFetcher, StatsDateTime
2
+ import importlib.resources as pkg_resources
3
+ import json
4
+ import numpy as np
5
+ import pandas as pd
6
+ from ..utils import StatsDateTime, StatsProcessor
7
+ import yaml
8
+
9
+
10
+
11
+ class ProfitLoseFetcher(StatsFetcher):
12
+ """
13
+ iFa.ai: 財務分析 -> 損益表
14
+ """
15
+
16
+ def __init__(self, ticker, db_client):
17
+ super().__init__(ticker, db_client)
18
+
19
+ self.table_settings = StatsProcessor.load_yaml("profit_lose.yaml")
20
+
21
+ def prepare_query(self, target_season):
22
+ pipeline = super().prepare_query()
23
+
24
+ target_query = {
25
+ "year": "$$target_season_data.year",
26
+ "season": "$$target_season_data.season",
27
+ "balance_sheet": "$$$$target_season_data.balance_sheet"
28
+ }
29
+
30
+ pipeline.append({
31
+ "$project": {
32
+ "_id": 0,
33
+ "ticker": 1,
34
+ "company_name": 1,
35
+ "profit_loses": {
36
+ "$sortArray": {
37
+ "input": {
38
+ "$map": {
39
+ "input": {
40
+ "$filter": {
41
+ "input": "$seasonal_data",
42
+ "as": "season",
43
+ "cond": {
44
+ "$eq":
45
+ ["$$season.season", target_season]
46
+ }
47
+ }
48
+ },
49
+ "as": "target_season_data",
50
+ "in": {
51
+ "year":
52
+ "$$target_season_data.year",
53
+ "season":
54
+ "$$target_season_data.season",
55
+ "profit_lose":
56
+ "$$target_season_data.profit_lose"
57
+ }
58
+ }
59
+ },
60
+ "sortBy": {
61
+ "year": -1
62
+ } # 按 year 降序排序
63
+ }
64
+ }
65
+ }
66
+ })
67
+
68
+ return pipeline
69
+
70
+ def collect_data(self, target_season):
71
+ pipeline = self.prepare_query(target_season)
72
+
73
+ fetched_data = self.collection.aggregate(pipeline)
74
+
75
+ return list(fetched_data)[-1]
76
+
77
+ def query_data(self):
78
+ today = StatsDateTime.get_today()
79
+
80
+ target_season = today.season
81
+ target_season = target_season - 1 if target_season > 1 else 4
82
+
83
+ fetched_data = self.collect_data(target_season)
84
+
85
+ return self.process_data(fetched_data, target_season)
86
+
87
+ def process_data(self, fetched_data, target_season):
88
+
89
+ profit_loses = fetched_data['profit_loses']
90
+
91
+ index_names = []
92
+
93
+ table_dict = dict()
94
+ grand_total_dict = dict()
95
+
96
+ return_dict = {
97
+ "ticker": fetched_data['ticker'],
98
+ "company_name": fetched_data['company_name'],
99
+ }
100
+
101
+ for data in profit_loses:
102
+ year = data['year']
103
+
104
+ time_index = f"{year}Q{target_season}"
105
+
106
+ # 蒐集整體的keys
107
+ index_names += list(data['profit_lose'].keys())
108
+ profit_lose = data['profit_lose']
109
+
110
+ for index_name, value_dict in profit_lose.items():
111
+ # (2020Q1, 項目, 金額或%)
112
+ for item_name, item in value_dict.items():
113
+ if (item_name == 'percentage'):
114
+ if (isinstance(item, (float, int))):
115
+ item = np.round(item, 2)
116
+ if ('YoY' in item_name):
117
+ if (isinstance(item, (float, int))):
118
+ item = np.round(item * 100, 2)
119
+ try:
120
+ table_dict[index_name][(time_index, item_name)] = item
121
+
122
+ except KeyError:
123
+ if (index_name not in table_dict.keys()):
124
+ table_dict[index_name] = dict()
125
+ grand_total_dict[index_name] = dict()
126
+
127
+ table_dict[index_name][(time_index, item_name)] = item
128
+
129
+ total_table = pd.DataFrame.from_dict(table_dict, orient='index')
130
+ total_table.columns = pd.MultiIndex.from_tuples(total_table.columns)
131
+
132
+ total_table = total_table.replace("N/A", None)
133
+
134
+ for name, setting in self.table_settings.items():
135
+ return_dict[name] = StatsProcessor.slice_multi_col_table(
136
+ total_table=total_table,
137
+ mode=setting['mode'],
138
+ target_index=setting['target_index']
139
+ if "target_index" in setting.keys() else None)
140
+
141
+ return return_dict
@@ -0,0 +1,26 @@
1
+ balance_sheet:
2
+ mode: value_and_percentage
3
+
4
+ total_asset:
5
+ mode: value_and_percentage
6
+ target_index: 資產總額 負債總額 權益總額
7
+
8
+ current_asset:
9
+ mode: value_and_percentage
10
+ target_index: 流動資產合計
11
+
12
+ non_current_asset:
13
+ mode: value_and_percentage
14
+ target_index: 非流動資產合計
15
+
16
+ current_debt:
17
+ mode: value_and_percentage
18
+ target_index: 流動負債合計
19
+
20
+ non_current_debt:
21
+ mode: value_and_percentage
22
+ target_index: 非流動負債合計
23
+
24
+ equity:
25
+ mode: value_and_percentage
26
+ target_index: 權益總額
@@ -0,0 +1,39 @@
1
+ # 注意此並非用於slicing
2
+ CASHO:
3
+ main_index: 營業活動之淨現金流入(流出)
4
+ index:
5
+ - 繼續營業單位稅前淨利(淨損)
6
+ - 收益費損項目合計
7
+ - 折舊費用
8
+ - 攤銷費用
9
+ - 與營業活動相關之資產及負債之淨變動合計
10
+ - 營業活動之淨現金流入(流出)
11
+
12
+ CASHI:
13
+ main_index: 投資活動之淨現金流入(流出)
14
+ index:
15
+ - 投資活動之淨現金流入(流出)
16
+ - 取得不動產、廠房及設備
17
+ - 處分不動產、廠房及設備
18
+ - 取得無形資產
19
+ - 處分無形資產
20
+ - 取得透過損益按公允價值衡量之金融資產
21
+ - 處分透過損益按公允價值衡量之金融資產
22
+ - 取得透過其他綜合損益按公允價值衡量之金融資產
23
+ - 處分透過其他綜合損益按公允價值衡量之金融資產
24
+ - 取得按攤銷後成本衡量之金融資產
25
+ - 處分按攤銷後成本衡量之金融資產
26
+ - 按攤銷後成本衡量之金融資產到期還本
27
+
28
+ CASHO:
29
+ main_index: 籌資活動之淨現金流入(流出)
30
+ index:
31
+ - 籌資活動之淨現金流入(流出)
32
+ - 短期借款增加
33
+ - 短期借款減少
34
+ - 發行公司債
35
+ - 償還公司債
36
+ - 舉借長期借款
37
+ - 償還長期借款
38
+ - 發放現金股利
39
+ - 庫藏股票買回成本
@@ -15,22 +15,22 @@ net_income:
15
15
  field: 本期淨利(淨損)
16
16
  value: value
17
17
 
18
+ tax_fee:
19
+ field: 所得稅費用(利益)合計
20
+ value: value
21
+
18
22
  # TODO: 以下所爬到的資料都是累計的,Ifa有額外計算當季的變化量
19
23
  operating_cash_flow:
20
24
  field: 營業活動之淨現金流入(流出)
21
- value: value
25
+ value: single_season_value
22
26
 
23
27
  invest_cash_flow:
24
28
  field: 投資活動之淨現金流入(流出)
25
- value: value
29
+ value: single_season_value
26
30
 
27
31
  financing_cash_flow:
28
32
  field: 籌資活動之淨現金流入(流出)
29
- value: value
30
-
31
- fcf:
32
- field: 本期現金及約當現金增加(減少)數
33
- value: value
33
+ value: single_season_value
34
34
  # ^^^ 以上皆需要額外在DataBase處理
35
35
 
36
36
  # 每股財務狀況
@@ -84,7 +84,7 @@ net_income_YoY:
84
84
 
85
85
  operating_cash_flow_YoY:
86
86
  field: 營業活動之淨現金流入(流出)
87
- value: YoY_1
87
+ value: single_season_YoY
88
88
 
89
89
  # operating_cash_flow_per_share_YoY:
90
90
  # field: 每股營業現金流年成長率
@@ -123,6 +123,13 @@ total_liabilities:
123
123
  field: 負債總額
124
124
  value: value
125
125
 
126
+ short_term_liabilities:
127
+ field: 短期借款
128
+ value: value
129
+
130
+ long_term_liabilities:
131
+ field: 長期借款
132
+ value: value
126
133
  #
127
134
  cash_and_cash_equivalents:
128
135
  field: 現金及約當現金
@@ -9,7 +9,7 @@ revenue:
9
9
  target_index: 營業收入合計
10
10
 
11
11
  grand_total_revenue:
12
- mode: grand_total_values
12
+ mode: grand_total_growth
13
13
  target_index: 營業收入合計
14
14
 
15
15
  gross_profit:
@@ -23,6 +23,7 @@
23
23
  "非流動資產合計": "balance_sheet",
24
24
  "資產總額": "balance_sheet",
25
25
  "短期借款": "balance_sheet",
26
+ "長期借款": "balance_sheet",
26
27
  "透過損益按公允價值衡量之金融負債-流動": "balance_sheet",
27
28
  "應付票據": "balance_sheet",
28
29
  "應付帳款": "balance_sheet",
@@ -1,18 +1,164 @@
1
1
  from importlib.resources import files
2
2
  import json
3
+ import numpy as np
4
+ import pandas as pd
3
5
  import yaml
4
6
 
7
+ target_metric_dict = {
8
+ 'value': ['value'],
9
+ 'value_and_percentage': ['value', 'percentage'],
10
+ 'percentage': ['percentage'],
11
+ 'grand_total': ['grand_total'],
12
+ 'grand_total_values': ['grand_total', 'grand_total_percentage'],
13
+ 'grand_total_percentage': ['grand_total_percentage'],
14
+ 'growth': [f'YoY_{i}' for i in [1, 3, 5, 10]],
15
+ 'grand_total_growth': [f"grand_total_YoY_{i}" for i in [1, 3, 5, 10]]
16
+ }
17
+
18
+
5
19
  class StatsProcessor:
20
+
6
21
  @classmethod
7
- def load_txt(cls, filename, json_load = True):
22
+ def load_txt(cls, filename, json_load=True):
8
23
  txt_path = files('neurostats_API.tools').joinpath(filename)
9
24
  with open(txt_path, 'r', encoding='utf-8') as f:
10
- data = json.load(f) if (json_load) else f.read()
25
+ data = json.load(f) if (json_load) else f.read()
11
26
  return data
27
+
12
28
  @classmethod
13
29
  def load_yaml(cls, filename):
14
30
  yaml_path = files('neurostats_API.tools').joinpath(filename)
15
31
  with open(yaml_path, 'r', encoding='utf-8') as f:
16
32
  data = yaml.safe_load(f)
17
33
 
18
- return data
34
+ return data
35
+
36
+ @classmethod
37
+ def expand_value_percentage(cls, dataframe):
38
+
39
+ expanded_columns = {}
40
+ for col in dataframe.columns:
41
+ # Use json_normalize to split 'value' and 'percentage'
42
+ expanded_df = pd.json_normalize(
43
+ dataframe[col]).add_prefix(f"{col}_")
44
+ expanded_df.index = dataframe.index
45
+ # Append the expanded columns to the new DataFrame
46
+ expanded_columns[col] = expanded_df
47
+
48
+ expanded_df = pd.concat(expanded_columns.values(), axis=1)
49
+
50
+ return expanded_df
51
+
52
+ @classmethod
53
+ def slice_table(
54
+ cls,
55
+ total_table,
56
+ mode='value',
57
+ target_index=None, # None or Str, 要特別抓哪個index
58
+ ):
59
+ """
60
+ total_table: column應為 <時間>_<單位>
61
+ 對只有單層column的table,切出想要的index
62
+ """
63
+ times = [
64
+ column.split("_")[0] for column in total_table.columns.unique()
65
+ ] #取出timeIndex
66
+ try:
67
+ target_metrics = target_metric_dict[mode]
68
+ except KeyError as e:
69
+ return f"mode Error: Get mode should be {list(target_metric_dict.keys())} but get {mode}"
70
+
71
+ desired_order = [
72
+ f"{time}_{value_name}" for time in times
73
+ for value_name in target_metrics
74
+ ]
75
+
76
+ if (target_index):
77
+ target_index = target_index.split()
78
+ sliced_table = total_table.loc[target_index, desired_order].T
79
+
80
+ return sliced_table.T
81
+
82
+ else:
83
+ return total_table.loc[:, desired_order]
84
+
85
+ @classmethod
86
+ def slice_multi_col_table(
87
+ cls,
88
+ total_table,
89
+ mode='value',
90
+ target_index=None, # None or Str, 要特別抓哪個index
91
+ ):
92
+ """
93
+ 對Multicolumn的dataframe切出目標的index
94
+ """
95
+ times = total_table.columns.get_level_values(0).unique()
96
+ try:
97
+ target_metrics = target_metric_dict[mode]
98
+ except KeyError as e:
99
+ return f"mode Error: Get mode should be {list(target_metric_dict.keys())} but get {mode}"
100
+
101
+ desired_order = [(time, value_name) for time in times
102
+ for value_name in target_metrics]
103
+
104
+ if (target_index):
105
+ target_index = target_index.split()
106
+ sliced_table = total_table.loc[
107
+ target_index, pd.IndexSlice[:,
108
+ target_metrics]][desired_order].T
109
+ if (mode == 'value_and_percentage'): # 因應balance_sheet 頁面的格式
110
+ return_table = sliced_table.T
111
+ return_table.columns = [
112
+ "_".join(flatten_indexs)
113
+ for flatten_indexs in return_table.columns.to_flat_index()
114
+ ]
115
+ return return_table
116
+
117
+ sliced_table = sliced_table.reset_index()
118
+ sliced_table = sliced_table.pivot(index='level_1',
119
+ columns='level_0',
120
+ values=target_index).sort_index(
121
+ axis=1,
122
+ level=1,
123
+ ascending=False)
124
+
125
+ sliced_table.columns = sliced_table.columns.get_level_values(1)
126
+ sliced_table.columns.name = None
127
+ sliced_table.index.name = None
128
+
129
+ return sliced_table.reindex(target_metrics)
130
+
131
+ else:
132
+ return_table = total_table.loc[:, pd.IndexSlice[:,
133
+ target_metrics]][
134
+ desired_order]
135
+ return_table.columns = [
136
+ "_".join(flatten_indexs)
137
+ for flatten_indexs in return_table.columns.to_flat_index()
138
+ ]
139
+ return return_table
140
+
141
+ @classmethod
142
+ def cal_percentage(cls, value, postfix="%"):
143
+ if (isinstance(value, (float, int))):
144
+ value = np.round(value * 100 , 2).item()
145
+ value = f"{value:.2f}{postfix}"
146
+
147
+ return value
148
+
149
+ else:
150
+ return value
151
+
152
+ @classmethod
153
+ def cal_non_percentage(cls, value, to_str=False, postfix="元"):
154
+ if (isinstance(value, (float, int))):
155
+ value = np.round(value, 2).item()
156
+ if (to_str):
157
+ value = f"{value:.2f}{postfix}"
158
+ return value
159
+
160
+ else:
161
+ return value
162
+
163
+ else:
164
+ return value