neurostats-API 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ from .base import StatsFetcher, StatsDateTime
2
+ import json
3
+ import pandas as pd
4
+ from ..utils import StatsDateTime, StatsProcessor
5
+ import importlib.resources as pkg_resources
6
+ import yaml
7
+
8
+
9
+ class MonthRevenueFetcher(StatsFetcher):
10
+ """
11
+ iFa.ai: 財務分析 -> 每月營收
12
+ """
13
+
14
+ def __init__(self, ticker, db_client):
15
+ super().__init__(ticker, db_client)
16
+
17
+ def prepare_query(self, target_year, target_month):
18
+ pipeline = super().prepare_query()
19
+
20
+ pipeline.append({
21
+ "$project": {
22
+ "_id": 0,
23
+ "ticker": 1,
24
+ "company_name": 1,
25
+ "monthly_data": {
26
+ "$sortArray": {
27
+ "input": "$monthly_data",
28
+ "sortBy": {
29
+ "year": -1,
30
+ "month": -1
31
+ }
32
+ }
33
+ },
34
+ }
35
+ })
36
+
37
+ return pipeline
38
+
39
+ def collect_data(self, target_year, target_month):
40
+ pipeline = self.prepare_query(target_year, target_month)
41
+
42
+ fetched_data = self.collection.aggregate(pipeline)
43
+
44
+ fetched_data = list(fetched_data)
45
+
46
+ return fetched_data[-1]
47
+
48
+ def query_data(self):
49
+ today = StatsDateTime.get_today()
50
+ target_month = today.month
51
+ target_year = today.year
52
+
53
+ # Query data
54
+ fetched_data = self.collect_data(target_year, target_month)
55
+
56
+ return self.process_data(fetched_data)
57
+
58
+ def process_data(self, fetched_data):
59
+
60
+ monthly_data = fetched_data['monthly_data']
61
+ target_month = monthly_data[0]['month']
62
+ monthly_df = pd.DataFrame(monthly_data)
63
+ target_month_df = monthly_df[monthly_df['month'] == target_month]
64
+ month_revenue_df = monthly_df.pivot(index='month',
65
+ columns='year',
66
+ values='revenue')
67
+
68
+ grand_total_df = target_month_df.pivot(index='month',
69
+ columns='year',
70
+ values='grand_total')
71
+
72
+ grand_total_df.rename(index={target_month: f"grand_total"},
73
+ inplace=True)
74
+ month_revenue_df = month_revenue_df.sort_index(ascending = False)
75
+ month_revenue_df = pd.concat([grand_total_df, month_revenue_df],
76
+ axis=0)
77
+
78
+ fetched_data['month_revenue'] = month_revenue_df[sorted(month_revenue_df.columns, reverse = True)]
79
+ # 歷年月營收
80
+ fetched_data[
81
+ 'this_month_revenue_over_years'] = target_month_df.set_index(
82
+ "year")[["revenue", "revenue_increment_ratio", "YoY_1",
83
+ "YoY_3", "YoY_5", "YoY_10"]].T
84
+ # 歷年營收成長量
85
+ fetched_data['grand_total_over_years'] = target_month_df.set_index(
86
+ "year")[["grand_total", "grand_total_increment_ratio",
87
+ "grand_total_YoY_1", "grand_total_YoY_3",
88
+ "grand_total_YoY_5", "grand_total_YoY_10"]].T
89
+
90
+ fetched_data.pop("monthly_data")
91
+
92
+ return fetched_data
@@ -0,0 +1,141 @@
1
+ from .base import StatsFetcher, StatsDateTime
2
+ import importlib.resources as pkg_resources
3
+ import json
4
+ import numpy as np
5
+ import pandas as pd
6
+ from ..utils import StatsDateTime, StatsProcessor
7
+ import yaml
8
+
9
+
10
+
11
+ class ProfitLoseFetcher(StatsFetcher):
12
+ """
13
+ iFa.ai: 財務分析 -> 損益表
14
+ """
15
+
16
+ def __init__(self, ticker, db_client):
17
+ super().__init__(ticker, db_client)
18
+
19
+ self.table_settings = StatsProcessor.load_yaml("profit_lose.yaml")
20
+
21
+ def prepare_query(self, target_season):
22
+ pipeline = super().prepare_query()
23
+
24
+ target_query = {
25
+ "year": "$$target_season_data.year",
26
+ "season": "$$target_season_data.season",
27
+ "balance_sheet": "$$$$target_season_data.balance_sheet"
28
+ }
29
+
30
+ pipeline.append({
31
+ "$project": {
32
+ "_id": 0,
33
+ "ticker": 1,
34
+ "company_name": 1,
35
+ "profit_loses": {
36
+ "$sortArray": {
37
+ "input": {
38
+ "$map": {
39
+ "input": {
40
+ "$filter": {
41
+ "input": "$seasonal_data",
42
+ "as": "season",
43
+ "cond": {
44
+ "$eq":
45
+ ["$$season.season", target_season]
46
+ }
47
+ }
48
+ },
49
+ "as": "target_season_data",
50
+ "in": {
51
+ "year":
52
+ "$$target_season_data.year",
53
+ "season":
54
+ "$$target_season_data.season",
55
+ "profit_lose":
56
+ "$$target_season_data.profit_lose"
57
+ }
58
+ }
59
+ },
60
+ "sortBy": {
61
+ "year": -1
62
+ } # 按 year 降序排序
63
+ }
64
+ }
65
+ }
66
+ })
67
+
68
+ return pipeline
69
+
70
+ def collect_data(self, target_season):
71
+ pipeline = self.prepare_query(target_season)
72
+
73
+ fetched_data = self.collection.aggregate(pipeline)
74
+
75
+ return list(fetched_data)[-1]
76
+
77
+ def query_data(self):
78
+ today = StatsDateTime.get_today()
79
+
80
+ target_season = today.season
81
+ target_season = target_season - 1 if target_season > 1 else 4
82
+
83
+ fetched_data = self.collect_data(target_season)
84
+
85
+ return self.process_data(fetched_data, target_season)
86
+
87
+ def process_data(self, fetched_data, target_season):
88
+
89
+ profit_loses = fetched_data['profit_loses']
90
+
91
+ index_names = []
92
+
93
+ table_dict = dict()
94
+ grand_total_dict = dict()
95
+
96
+ return_dict = {
97
+ "ticker": fetched_data['ticker'],
98
+ "company_name": fetched_data['company_name'],
99
+ }
100
+
101
+ for data in profit_loses:
102
+ year = data['year']
103
+
104
+ time_index = f"{year}Q{target_season}"
105
+
106
+ # 蒐集整體的keys
107
+ index_names += list(data['profit_lose'].keys())
108
+ profit_lose = data['profit_lose']
109
+
110
+ for index_name, value_dict in profit_lose.items():
111
+ # (2020Q1, 項目, 金額或%)
112
+ for item_name, item in value_dict.items():
113
+ if (item_name == 'percentage'):
114
+ if (isinstance(item, (float, int))):
115
+ item = np.round(item, 2)
116
+ if ('YoY' in item_name):
117
+ if (isinstance(item, (float, int))):
118
+ item = np.round(item * 100, 2)
119
+ try:
120
+ table_dict[index_name][(time_index, item_name)] = item
121
+
122
+ except KeyError:
123
+ if (index_name not in table_dict.keys()):
124
+ table_dict[index_name] = dict()
125
+ grand_total_dict[index_name] = dict()
126
+
127
+ table_dict[index_name][(time_index, item_name)] = item
128
+
129
+ total_table = pd.DataFrame.from_dict(table_dict, orient='index')
130
+ total_table.columns = pd.MultiIndex.from_tuples(total_table.columns)
131
+
132
+ total_table = total_table.replace("N/A", None)
133
+
134
+ for name, setting in self.table_settings.items():
135
+ return_dict[name] = StatsProcessor.slice_multi_col_table(
136
+ total_table=total_table,
137
+ mode=setting['mode'],
138
+ target_index=setting['target_index']
139
+ if "target_index" in setting.keys() else None)
140
+
141
+ return return_dict
@@ -0,0 +1,26 @@
1
+ balance_sheet:
2
+ mode: value_and_percentage
3
+
4
+ total_asset:
5
+ mode: value_and_percentage
6
+ target_index: 資產總額 負債總額 權益總額
7
+
8
+ current_asset:
9
+ mode: value_and_percentage
10
+ target_index: 流動資產合計
11
+
12
+ non_current_asset:
13
+ mode: value_and_percentage
14
+ target_index: 非流動資產合計
15
+
16
+ current_debt:
17
+ mode: value_and_percentage
18
+ target_index: 流動負債合計
19
+
20
+ non_current_debt:
21
+ mode: value_and_percentage
22
+ target_index: 非流動負債合計
23
+
24
+ equity:
25
+ mode: value_and_percentage
26
+ target_index: 權益總額
@@ -0,0 +1,39 @@
1
+ # 注意此並非用於slicing
2
+ CASHO:
3
+ main_index: 營業活動之淨現金流入(流出)
4
+ index:
5
+ - 繼續營業單位稅前淨利(淨損)
6
+ - 收益費損項目合計
7
+ - 折舊費用
8
+ - 攤銷費用
9
+ - 與營業活動相關之資產及負債之淨變動合計
10
+ - 營業活動之淨現金流入(流出)
11
+
12
+ CASHI:
13
+ main_index: 投資活動之淨現金流入(流出)
14
+ index:
15
+ - 投資活動之淨現金流入(流出)
16
+ - 取得不動產、廠房及設備
17
+ - 處分不動產、廠房及設備
18
+ - 取得無形資產
19
+ - 處分無形資產
20
+ - 取得透過損益按公允價值衡量之金融資產
21
+ - 處分透過損益按公允價值衡量之金融資產
22
+ - 取得透過其他綜合損益按公允價值衡量之金融資產
23
+ - 處分透過其他綜合損益按公允價值衡量之金融資產
24
+ - 取得按攤銷後成本衡量之金融資產
25
+ - 處分按攤銷後成本衡量之金融資產
26
+ - 按攤銷後成本衡量之金融資產到期還本
27
+
28
+ CASHO:
29
+ main_index: 籌資活動之淨現金流入(流出)
30
+ index:
31
+ - 籌資活動之淨現金流入(流出)
32
+ - 短期借款增加
33
+ - 短期借款減少
34
+ - 發行公司債
35
+ - 償還公司債
36
+ - 舉借長期借款
37
+ - 償還長期借款
38
+ - 發放現金股利
39
+ - 庫藏股票買回成本
@@ -15,22 +15,22 @@ net_income:
15
15
  field: 本期淨利(淨損)
16
16
  value: value
17
17
 
18
+ tax_fee:
19
+ field: 所得稅費用(利益)合計
20
+ value: value
21
+
18
22
  # TODO: 以下所爬到的資料都是累計的,Ifa有額外計算當季的變化量
19
23
  operating_cash_flow:
20
24
  field: 營業活動之淨現金流入(流出)
21
- value: value
25
+ value: single_season_value
22
26
 
23
27
  invest_cash_flow:
24
28
  field: 投資活動之淨現金流入(流出)
25
- value: value
29
+ value: single_season_value
26
30
 
27
31
  financing_cash_flow:
28
32
  field: 籌資活動之淨現金流入(流出)
29
- value: value
30
-
31
- fcf:
32
- field: 本期現金及約當現金增加(減少)數
33
- value: value
33
+ value: single_season_value
34
34
  # ^^^ 以上皆需要額外在DataBase處理
35
35
 
36
36
  # 每股財務狀況
@@ -84,7 +84,7 @@ net_income_YoY:
84
84
 
85
85
  operating_cash_flow_YoY:
86
86
  field: 營業活動之淨現金流入(流出)
87
- value: YoY_1
87
+ value: single_season_YoY
88
88
 
89
89
  # operating_cash_flow_per_share_YoY:
90
90
  # field: 每股營業現金流年成長率
@@ -123,6 +123,13 @@ total_liabilities:
123
123
  field: 負債總額
124
124
  value: value
125
125
 
126
+ short_term_liabilities:
127
+ field: 短期借款
128
+ value: value
129
+
130
+ long_term_liabilities:
131
+ field: 長期借款
132
+ value: value
126
133
  #
127
134
  cash_and_cash_equivalents:
128
135
  field: 現金及約當現金
@@ -9,7 +9,7 @@ revenue:
9
9
  target_index: 營業收入合計
10
10
 
11
11
  grand_total_revenue:
12
- mode: grand_total_values
12
+ mode: grand_total_growth
13
13
  target_index: 營業收入合計
14
14
 
15
15
  gross_profit:
@@ -23,6 +23,7 @@
23
23
  "非流動資產合計": "balance_sheet",
24
24
  "資產總額": "balance_sheet",
25
25
  "短期借款": "balance_sheet",
26
+ "長期借款": "balance_sheet",
26
27
  "透過損益按公允價值衡量之金融負債-流動": "balance_sheet",
27
28
  "應付票據": "balance_sheet",
28
29
  "應付帳款": "balance_sheet",
@@ -1,18 +1,164 @@
1
1
  from importlib.resources import files
2
2
  import json
3
+ import numpy as np
4
+ import pandas as pd
3
5
  import yaml
4
6
 
7
+ target_metric_dict = {
8
+ 'value': ['value'],
9
+ 'value_and_percentage': ['value', 'percentage'],
10
+ 'percentage': ['percentage'],
11
+ 'grand_total': ['grand_total'],
12
+ 'grand_total_values': ['grand_total', 'grand_total_percentage'],
13
+ 'grand_total_percentage': ['grand_total_percentage'],
14
+ 'growth': [f'YoY_{i}' for i in [1, 3, 5, 10]],
15
+ 'grand_total_growth': [f"grand_total_YoY_{i}" for i in [1, 3, 5, 10]]
16
+ }
17
+
18
+
5
19
  class StatsProcessor:
20
+
6
21
  @classmethod
7
- def load_txt(cls, filename, json_load = True):
22
+ def load_txt(cls, filename, json_load=True):
8
23
  txt_path = files('neurostats_API.tools').joinpath(filename)
9
24
  with open(txt_path, 'r', encoding='utf-8') as f:
10
- data = json.load(f) if (json_load) else f.read()
25
+ data = json.load(f) if (json_load) else f.read()
11
26
  return data
27
+
12
28
  @classmethod
13
29
  def load_yaml(cls, filename):
14
30
  yaml_path = files('neurostats_API.tools').joinpath(filename)
15
31
  with open(yaml_path, 'r', encoding='utf-8') as f:
16
32
  data = yaml.safe_load(f)
17
33
 
18
- return data
34
+ return data
35
+
36
+ @classmethod
37
+ def expand_value_percentage(cls, dataframe):
38
+
39
+ expanded_columns = {}
40
+ for col in dataframe.columns:
41
+ # Use json_normalize to split 'value' and 'percentage'
42
+ expanded_df = pd.json_normalize(
43
+ dataframe[col]).add_prefix(f"{col}_")
44
+ expanded_df.index = dataframe.index
45
+ # Append the expanded columns to the new DataFrame
46
+ expanded_columns[col] = expanded_df
47
+
48
+ expanded_df = pd.concat(expanded_columns.values(), axis=1)
49
+
50
+ return expanded_df
51
+
52
+ @classmethod
53
+ def slice_table(
54
+ cls,
55
+ total_table,
56
+ mode='value',
57
+ target_index=None, # None or Str, 要特別抓哪個index
58
+ ):
59
+ """
60
+ total_table: column應為 <時間>_<單位>
61
+ 對只有單層column的table,切出想要的index
62
+ """
63
+ times = [
64
+ column.split("_")[0] for column in total_table.columns.unique()
65
+ ] #取出timeIndex
66
+ try:
67
+ target_metrics = target_metric_dict[mode]
68
+ except KeyError as e:
69
+ return f"mode Error: Get mode should be {list(target_metric_dict.keys())} but get {mode}"
70
+
71
+ desired_order = [
72
+ f"{time}_{value_name}" for time in times
73
+ for value_name in target_metrics
74
+ ]
75
+
76
+ if (target_index):
77
+ target_index = target_index.split()
78
+ sliced_table = total_table.loc[target_index, desired_order].T
79
+
80
+ return sliced_table.T
81
+
82
+ else:
83
+ return total_table.loc[:, desired_order]
84
+
85
+ @classmethod
86
+ def slice_multi_col_table(
87
+ cls,
88
+ total_table,
89
+ mode='value',
90
+ target_index=None, # None or Str, 要特別抓哪個index
91
+ ):
92
+ """
93
+ 對Multicolumn的dataframe切出目標的index
94
+ """
95
+ times = total_table.columns.get_level_values(0).unique()
96
+ try:
97
+ target_metrics = target_metric_dict[mode]
98
+ except KeyError as e:
99
+ return f"mode Error: Get mode should be {list(target_metric_dict.keys())} but get {mode}"
100
+
101
+ desired_order = [(time, value_name) for time in times
102
+ for value_name in target_metrics]
103
+
104
+ if (target_index):
105
+ target_index = target_index.split()
106
+ sliced_table = total_table.loc[
107
+ target_index, pd.IndexSlice[:,
108
+ target_metrics]][desired_order].T
109
+ if (mode == 'value_and_percentage'): # 因應balance_sheet 頁面的格式
110
+ return_table = sliced_table.T
111
+ return_table.columns = [
112
+ "_".join(flatten_indexs)
113
+ for flatten_indexs in return_table.columns.to_flat_index()
114
+ ]
115
+ return return_table
116
+
117
+ sliced_table = sliced_table.reset_index()
118
+ sliced_table = sliced_table.pivot(index='level_1',
119
+ columns='level_0',
120
+ values=target_index).sort_index(
121
+ axis=1,
122
+ level=1,
123
+ ascending=False)
124
+
125
+ sliced_table.columns = sliced_table.columns.get_level_values(1)
126
+ sliced_table.columns.name = None
127
+ sliced_table.index.name = None
128
+
129
+ return sliced_table.reindex(target_metrics)
130
+
131
+ else:
132
+ return_table = total_table.loc[:, pd.IndexSlice[:,
133
+ target_metrics]][
134
+ desired_order]
135
+ return_table.columns = [
136
+ "_".join(flatten_indexs)
137
+ for flatten_indexs in return_table.columns.to_flat_index()
138
+ ]
139
+ return return_table
140
+
141
+ @classmethod
142
+ def cal_percentage(cls, value, postfix="%"):
143
+ if (isinstance(value, (float, int))):
144
+ value = np.round(value * 100 , 2).item()
145
+ value = f"{value:.2f}{postfix}"
146
+
147
+ return value
148
+
149
+ else:
150
+ return value
151
+
152
+ @classmethod
153
+ def cal_non_percentage(cls, value, to_str=False, postfix="元"):
154
+ if (isinstance(value, (float, int))):
155
+ value = np.round(value, 2).item()
156
+ if (to_str):
157
+ value = f"{value:.2f}{postfix}"
158
+ return value
159
+
160
+ else:
161
+ return value
162
+
163
+ else:
164
+ return value