neurostats-API 0.0.21b0__py3-none-any.whl → 0.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. neurostats_API/__init__.py +1 -1
  2. neurostats_API/fetchers/balance_sheet.py +152 -102
  3. neurostats_API/fetchers/base.py +93 -74
  4. neurostats_API/fetchers/cash_flow.py +143 -113
  5. neurostats_API/fetchers/finance_overview.py +28 -28
  6. neurostats_API/fetchers/institution.py +211 -97
  7. neurostats_API/fetchers/margin_trading.py +121 -94
  8. neurostats_API/fetchers/month_revenue.py +139 -105
  9. neurostats_API/fetchers/profit_lose.py +203 -108
  10. neurostats_API/fetchers/tech.py +117 -42
  11. neurostats_API/fetchers/tej_finance_report.py +248 -338
  12. neurostats_API/fetchers/value_invest.py +32 -12
  13. neurostats_API/tools/company_list/tw.json +2175 -0
  14. neurostats_API/tools/tej_db/tej_db_percent_index.yaml +0 -3
  15. neurostats_API/tools/tej_db/tej_db_skip_index.yaml +14 -1
  16. neurostats_API/tools/tej_db/tej_db_thousand_index.yaml +0 -5
  17. neurostats_API/utils/__init__.py +0 -1
  18. neurostats_API/utils/calculate_value.py +102 -1
  19. neurostats_API/utils/data_process.py +53 -19
  20. neurostats_API/utils/logger.py +21 -0
  21. {neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23.dist-info}/METADATA +2 -2
  22. neurostats_API-0.0.23.dist-info/RECORD +35 -0
  23. neurostats_API/utils/fetcher.py +0 -1056
  24. neurostats_API-0.0.21b0.dist-info/RECORD +0 -34
  25. /neurostats_API/tools/{balance_sheet.yaml → twse/balance_sheet.yaml} +0 -0
  26. /neurostats_API/tools/{cash_flow_percentage.yaml → twse/cash_flow_percentage.yaml} +0 -0
  27. /neurostats_API/tools/{finance_overview_dict.yaml → twse/finance_overview_dict.yaml} +0 -0
  28. /neurostats_API/tools/{profit_lose.yaml → twse/profit_lose.yaml} +0 -0
  29. /neurostats_API/tools/{seasonal_data_field_dict.txt → twse/seasonal_data_field_dict.txt} +0 -0
  30. {neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23.dist-info}/WHEEL +0 -0
  31. {neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23.dist-info}/top_level.txt +0 -0
@@ -3,11 +3,10 @@ import importlib.resources as pkg_resources
3
3
  import json
4
4
  import numpy as np
5
5
  import pandas as pd
6
- from ..utils import StatsDateTime, StatsProcessor
6
+ from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
7
7
  import yaml
8
8
 
9
9
 
10
-
11
10
  class ProfitLoseFetcher(StatsFetcher):
12
11
  """
13
12
  iFa.ai: 財務分析 -> 損益表
@@ -16,143 +15,239 @@ class ProfitLoseFetcher(StatsFetcher):
16
15
  def __init__(self, ticker, db_client):
17
16
  super().__init__(ticker, db_client)
18
17
 
19
- self.table_settings = StatsProcessor.load_yaml("profit_lose.yaml")
18
+ self.table_settings = StatsProcessor.load_yaml("twse/profit_lose.yaml")
19
+
20
+ self.process_function_map = {
21
+ "twse_stats": self.process_data_twse,
22
+ "us_stats": self.process_data_us
23
+ }
20
24
 
21
- def prepare_query(self, target_season):
25
+ self.return_keys = [
26
+ 'profit_lose', 'grand_total_profit_lose', 'revenue', 'grand_total_revenue',
27
+ 'gross_profit', 'grand_total_gross_profit', 'gross_profit_percentage',
28
+ 'grand_total_gross_profit_percentage', 'operating_income', 'grand_total_operating_income', 'operating_income_percentage',
29
+ 'grand_total_operating_income_percentage', 'net_income_before_tax', 'grand_total_net_income_before_tax', 'net_income_before_tax_percentage',
30
+ 'grand_total_net_income_before_tax_percentage', 'net_income', 'grand_total_net_income', 'net_income_percentage',
31
+ 'grand_total_income_percentage', 'EPS', 'EPS_growth', 'grand_total_EPS',
32
+ 'grand_total_EPS_growth', 'profit_lose_all', 'profit_lose_YoY'
33
+ ]
34
+
35
+ def prepare_query(self):
22
36
  pipeline = super().prepare_query()
23
37
 
24
- pipeline.append({
38
+ name_map = {"twse_stats": "profit_lose", "us_stats": "income_statement"}
39
+
40
+ chart_name = name_map.get(self.collection_name, "income_statement")
41
+
42
+ append_pipeline = [
43
+ {
25
44
  "$project": {
26
45
  "_id": 0,
27
46
  "ticker": 1,
28
47
  "company_name": 1,
29
- "profit_loses": {
30
- "$sortArray": {
31
- "input": {
32
- "$map": {
33
- "input": {
34
- "$filter": {
35
- "input": "$seasonal_data",
36
- "as": "season",
37
- "cond": {
38
- "$eq":
39
- ["$$season.season", target_season]
40
- }
41
- }
42
- },
43
- "as": "target_season_data",
44
- "in": {
45
- "year":
46
- "$$target_season_data.year",
47
- "season":
48
- "$$target_season_data.season",
49
- "profit_lose":
50
- "$$target_season_data.profit_lose"
51
- }
52
- }
53
- },
54
- "sortBy": {
55
- "year": -1
56
- } # 按 year 降序排序
48
+ "seasonal_data": {
49
+ "$map": {
50
+ "input": {"$ifNull": ["$seasonal_data", []]},
51
+ "as": "season",
52
+ "in": {
53
+ "year": "$$season.year",
54
+ "season": "$$season.season",
55
+ "data": {"$ifNull": [f"$$season.{chart_name}", []]}
56
+ }
57
57
  }
58
58
  }
59
59
  }
60
- })
61
-
62
- return pipeline
60
+ }
61
+ ]
63
62
 
64
- def collect_data(self, target_season):
65
- pipeline = self.prepare_query(target_season)
63
+ pipeline = pipeline + append_pipeline
66
64
 
67
- fetched_data = self.collection.aggregate(pipeline)
65
+ return pipeline
68
66
 
69
- return list(fetched_data)[-1]
67
+ def collect_data(self):
68
+ return super().collect_data()
70
69
 
71
70
  def query_data(self):
72
- try:
73
- latest_time = StatsDateTime.get_latest_time(
74
- self.ticker, self.collection)['last_update_time']
75
- target_season = latest_time['seasonal_data']['latest_season']
76
- except Exception as e:
77
- today = StatsDateTime.get_today()
78
-
79
- target_season = today.season
80
- target_season = target_season - 1 if target_season > 1 else 4
71
+ fetched_data = self.collect_data()
81
72
 
82
- fetched_data = self.collect_data(target_season)
73
+ fetched_data = fetched_data[0]
83
74
 
84
- return self.process_data(fetched_data, target_season)
75
+ process_fn = self.process_function_map.get(
76
+ self.collection_name, self.process_data_us
77
+ )
78
+ return process_fn(fetched_data)
85
79
 
86
- def process_data(self, fetched_data, target_season):
80
+ def process_data_twse(self, fetched_data):
87
81
 
88
- profit_loses = fetched_data['profit_loses']
82
+ latest_time = StatsDateTime.get_latest_time(
83
+ self.ticker, self.collection
84
+ ).get('last_update_time', {})
89
85
 
90
- index_names = []
91
-
92
- table_dict = dict()
93
- grand_total_dict = dict()
86
+ # 取最新時間資料時間,沒取到就預設去年年底
87
+ target_year = latest_time.get('seasonal_data', {}).get(
88
+ 'latest_target_year',
89
+ StatsDateTime.get_today().year - 1
90
+ )
91
+ target_season = latest_time.get('seasonal_data',
92
+ {}).get('latest_season', 4)
94
93
 
95
94
  return_dict = {
96
- "ticker": fetched_data['ticker'],
95
+ "ticker": self.ticker,
97
96
  "company_name": fetched_data['company_name'],
98
97
  }
99
98
 
100
- for data in profit_loses:
101
- year = data['year']
102
-
103
- time_index = f"{year}Q{target_season}"
104
-
105
- # 蒐集整體的keys
106
- index_names += list(data['profit_lose'].keys())
107
- profit_lose = data['profit_lose']
108
-
109
- for index_name, value_dict in profit_lose.items():
110
- # (2020Q1, 項目, 金額或%)
111
- for item_name, item in value_dict.items():
112
- if ('percentage' in item_name):
113
- if (isinstance(item, (float, int))):
114
- item = StatsProcessor.cal_non_percentage(item, to_str=True, postfix="%")
115
- elif ('YoY' in item_name):
116
- if (isinstance(item, (float, int))):
117
- item = StatsProcessor.cal_percentage(item)
118
- elif ('每股盈餘' in index_name):
119
- if (isinstance(item, (float, int))):
120
- item = StatsProcessor.cal_non_percentage(item, postfix="元")
121
- else:
122
- if (isinstance(item, (float, int))):
123
- item = StatsProcessor.cal_non_percentage(item, postfix="千元")
124
- try:
125
- table_dict[index_name][(time_index, item_name)] = item
126
-
127
- except KeyError:
128
- if (index_name not in table_dict.keys()):
129
- table_dict[index_name] = dict()
130
- grand_total_dict[index_name] = dict()
131
-
132
- table_dict[index_name][(time_index, item_name)] = item
133
-
134
- total_table = pd.DataFrame.from_dict(table_dict, orient='index')
135
- total_table.columns = pd.MultiIndex.from_tuples(total_table.columns)
136
-
137
- total_table = total_table.replace("N/A", None)
99
+ seasonal_data = fetched_data.get('seasonal_data', [])
100
+
101
+ if (not seasonal_data):
102
+ return_dict.update(self._get_empty_structure())
103
+ return return_dict
104
+
105
+ profit_lose_dict = {
106
+ f"{data['year']}Q{data['season']}": data['data']
107
+ for data in seasonal_data
108
+ }
109
+
110
+ profit_lose_dict = YoY_Calculator.cal_QoQ(profit_lose_dict)
111
+ profit_lose_df = pd.DataFrame.from_dict(profit_lose_dict)
112
+ target_season_col = profit_lose_df.columns.str.endswith(
113
+ f"Q{target_season}"
114
+ )
115
+ profit_lose_df = profit_lose_df.loc[:, target_season_col]
116
+
117
+ old_profit_lose_df = StatsProcessor.expand_value_percentage(
118
+ profit_lose_df
119
+ )
120
+ # OLD: 回傳包含value & percentage
121
+ value_col = old_profit_lose_df.columns.str.endswith(f"_value")
122
+ percentage_col = old_profit_lose_df.columns.str.endswith(f"_percentage")
123
+ # OLD: 回傳剔除grand_total
124
+ grand_total_value_col = old_profit_lose_df.columns.str.endswith(
125
+ f"grand_total_value"
126
+ )
127
+ grand_total_percentage_col = old_profit_lose_df.columns.str.endswith(
128
+ f"grand_total_percentage"
129
+ )
130
+
131
+ old_profit_lose_df = old_profit_lose_df.loc[:, (
132
+ (value_col & ~grand_total_value_col) |
133
+ (percentage_col & ~grand_total_percentage_col)
134
+ )]
135
+
136
+ for time_index, data_dict in profit_lose_dict.items():
137
+ profit_lose_dict[time_index] = self.flatten_dict(
138
+ value_dict=data_dict,
139
+ indexes=list(data_dict.keys()),
140
+ target_keys=[
141
+ "value", "growth", "percentage", "grand_total",
142
+ "grand_total_percentage"
143
+ ] + [f"YoY_{i}" for i in [1, 3, 5, 10]] +
144
+ [f"grand_total_YoY_{i}" for i in [1, 3, 5, 10]]
145
+ )
146
+
147
+ profit_lose_df = pd.DataFrame.from_dict(profit_lose_dict).T
148
+ # EPS的value用元計算
149
+ eps_index = (
150
+ profit_lose_df.columns.str.endswith("_value")
151
+ & profit_lose_df.columns.str.contains("每股盈餘")
152
+ )
153
+ eps_copy = profit_lose_df.loc[:, eps_index].copy()
154
+ eps_mask_index = eps_copy.columns
155
+ profit_lose_df[eps_mask_index] = profit_lose_df[eps_mask_index].map(
156
+ lambda x: StatsProcessor.cal_non_percentage(x, postfix="元")
157
+ )
158
+
159
+ # percentage處理
160
+ percentage_index = profit_lose_df.columns.str.endswith("percentage")
161
+ growth_index = profit_lose_df.columns.str.endswith("growth")
162
+ percentage_mask = (percentage_index | growth_index)
163
+ percentage_copy = profit_lose_df.loc[:, percentage_mask]
164
+ percentage_mask_index = percentage_copy.columns
165
+
166
+ profit_lose_df[percentage_mask_index] = profit_lose_df[
167
+ percentage_mask_index].map(
168
+ lambda x: StatsProcessor.
169
+ cal_non_percentage(x, to_str=True, postfix="%")
170
+ )
171
+
172
+ # YoY處理: 乘以100
173
+ YoY_index = profit_lose_df.columns.str.contains("YoY")
174
+ YoY_mask = YoY_index
175
+ YoY_copy = profit_lose_df.loc[:, YoY_mask]
176
+ YoY_mask_cols = YoY_copy.columns
177
+
178
+ profit_lose_df[YoY_mask_cols] = profit_lose_df[YoY_mask_cols].map(
179
+ lambda x: StatsProcessor.cal_percentage(x)
180
+ )
181
+
182
+ # 剩下的處理: 乘以千元
183
+ value_index = ~(
184
+ percentage_index | growth_index | YoY_index | eps_index
185
+ ) # 除了上述以外的 index
186
+
187
+ value_col = profit_lose_df.loc[:, value_index].columns
188
+ profit_lose_df[value_col] = profit_lose_df[value_col].map(
189
+ lambda x: StatsProcessor.cal_non_percentage(x, postfix="千元")
190
+ )
191
+
192
+ total_table = profit_lose_df.replace("N/A", None).T
193
+
194
+ # 取特定季度
195
+ target_season_columns = total_table.columns.str.endswith(
196
+ f"Q{target_season}"
197
+ )
198
+ total_table_YoY = total_table.loc[:, target_season_columns]
138
199
 
139
200
  for name, setting in self.table_settings.items():
140
- if ('target_index' in setting.keys()):
141
- target_indexes = [target.strip() for target in setting['target_index']]
142
- else:
143
- target_indexes = [None]
144
-
201
+ target_indexes = setting.get('target_index', [None])
145
202
  for target_index in target_indexes:
146
203
  try:
147
- return_dict[name] = StatsProcessor.slice_multi_col_table(
148
- total_table=total_table,
204
+ return_dict[name] = StatsProcessor.slice_table(
205
+ total_table=total_table_YoY,
149
206
  mode=setting['mode'],
150
- target_index=target_index)
207
+ target_index=target_index
208
+ )
151
209
  break
152
210
  except Exception as e:
153
- return_dict[name] = StatsProcessor.slice_multi_col_table(
154
- total_table=total_table,
155
- mode=setting['mode'],
156
- target_index=target_index)
211
+ continue
212
+
213
+ return_dict.update(
214
+ {
215
+ "profit_lose": old_profit_lose_df,
216
+ "profit_lose_all": total_table.copy(),
217
+ "profit_lose_YoY": total_table_YoY
218
+ }
219
+ )
220
+ return return_dict
221
+
222
+ def process_data_us(self, fetched_data):
223
+
224
+ table_dict = {
225
+ f"{data['year']}Q{data['season']}": data['profit_lose']
226
+ for data in fetched_data
227
+ }
228
+
229
+ table_dict = YoY_Calculator.cal_QoQ(table_dict)
230
+ table_dict = YoY_Calculator.cal_YoY(table_dict)
231
+
232
+ for time_index, data_dict in table_dict.items():
233
+ table_dict[time_index] = self.flatten_dict(
234
+ value_dict=data_dict,
235
+ indexes=list(data_dict.keys()),
236
+ target_keys=["value", "growth"] +
237
+ [f"YoY_{i}" for i in [1, 3, 5, 10]]
238
+ )
239
+
240
+ # 計算QoQ
241
+
242
+ return_dict = {
243
+ "ticker": self.ticker,
244
+ "company_name": fetched_data[-1]['company_name'],
245
+ "profit_lose": pd.DataFrame.from_dict(table_dict)
246
+ }
157
247
 
158
248
  return return_dict
249
+
250
+ def _get_empty_structure(self):
251
+ return {
252
+ key: pd.DataFrame(columns= pd.Index([], name = 'date')) for key in self.return_keys
253
+ }
@@ -12,7 +12,10 @@ class TechFetcher(StatsFetcher):
12
12
  """
13
13
 
14
14
  super().__init__(ticker, db_client)
15
- self.collection = self.db["TWN/APIPRCD"]
15
+ if (ticker in self.tw_company_list.keys()):
16
+ self.twse_collection = self.db['twse_stats']
17
+ self.tej_collection = self.db["TWN/APIPRCD"]
18
+
16
19
  self.full_ohlcv = self._get_ohlcv()
17
20
  self.basic_indexes = [
18
21
  'SMA5', 'SMA20', 'SMA60', 'EMA5', 'EMA20', 'EMA40', 'EMA12',
@@ -51,53 +54,47 @@ class TechFetcher(StatsFetcher):
51
54
 
52
55
  required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
53
56
 
54
- try:
55
- # 先對yf search
56
- if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
57
- full_tick = f'^{self.ticker}'
58
- else:
59
- full_tick = f'{self.ticker}.tw'
60
-
57
+ if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
58
+ full_tick = f'^{self.ticker}'
61
59
  df = self.conduct_yf_search(full_tick)
62
60
 
63
- if not self.has_required_columns(df, required_cols):
64
-
65
- print(f".tw failed, try .two")
66
-
67
- full_tick = f'{self.ticker}.two'
68
-
69
- df = self.conduct_yf_search(full_tick)
61
+ return df[required_cols]
70
62
 
71
- if (df.empty):
72
- raise ValueError(f"No data found for ticker: {self.ticker}")
73
-
74
- except (KeyError, ValueError, TypeError) as e:
75
- # 再對TEJ search
76
- tej_required_cols = [
77
- "mdate", "open_d", 'high_d', 'low_d', 'close_d', 'vol'
63
+ elif(self.ticker in self.tw_company_list.keys()):
64
+ search_fns = [
65
+ self.conduct_db_search_twse,
66
+ self.conduct_db_search_tej,
67
+ lambda: self.conduct_yf_search(f'{self.ticker}.tw'),
68
+ lambda: self.conduct_yf_search(f'{self.ticker}.two')
78
69
  ]
79
- tej_name_proj = {
80
- tej_name: org_name
81
- for tej_name, org_name in zip(tej_required_cols, required_cols)
82
- }
83
-
84
- query = {'ticker': self.ticker}
85
- ticker_full = self.collection.find_one(query)
86
-
87
- if not ticker_full:
88
- raise ValueError("No ticker found in database")
89
-
90
- daily_data = ticker_full.get("data", [])
91
- if not isinstance(daily_data, list):
92
- raise TypeError("Expected 'daily_data' to be a list.")
93
-
94
- df = pd.DataFrame(daily_data)
95
70
 
96
- if not self.has_required_columns(df, tej_required_cols):
97
- raise KeyError(f"Missing required columns")
98
- df = df.rename(columns=tej_name_proj)
71
+ for search_method in search_fns:
72
+ try:
73
+ df = search_method()
74
+ break
75
+ except (KeyError, ValueError, TypeError):
76
+ continue
77
+ else:
78
+ return pd.DataFrame(columns=required_cols)
79
+
80
+ # break跳出後
81
+ return df[required_cols]
82
+ else: # 美股
83
+ search_fns = [
84
+ self.conduct_db_search_us,
85
+ lambda : self.conduct_yf_search(f"{self.ticker}")
86
+ ]
87
+ for search_method in search_fns:
88
+ try:
89
+ df = search_method()
90
+ break
91
+ except (KeyError, ValueError, TypeError):
92
+ continue
93
+ else:
94
+ df = pd.DataFrame()
99
95
 
100
- return df[required_cols]
96
+ return df
97
+
101
98
 
102
99
  def get_daily(self):
103
100
 
@@ -141,7 +138,85 @@ class TechFetcher(StatsFetcher):
141
138
  )
142
139
 
143
140
  return df
141
+
142
+ def conduct_db_search_tej(self):
143
+ # 再對TEJ search
144
+ tej_required_cols = [
145
+ "mdate", "open_d", 'high_d', 'low_d', 'close_d', 'vol'
146
+ ]
147
+
148
+ required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
149
+ tej_name_proj = {
150
+ tej_name: org_name
151
+ for tej_name, org_name in zip(tej_required_cols, required_cols)
152
+ }
153
+
154
+ query = {'ticker': self.ticker}
155
+ ticker_full = self.collection.find_one(query)
156
+
157
+ if not ticker_full:
158
+ raise ValueError("No ticker found in database")
144
159
 
160
+ daily_data = ticker_full.get("data", [])
161
+ if not isinstance(daily_data, list):
162
+ raise TypeError("Expected 'daily_data' to be a list.")
163
+
164
+ df = pd.DataFrame(daily_data)
165
+
166
+ if not self.has_required_columns(df, tej_required_cols):
167
+ raise KeyError(f"Missing required columns")
168
+ df = df.rename(columns=tej_name_proj)
169
+
170
+ return df[required_cols]
171
+
172
+ def conduct_db_search_us(self):
173
+ required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
174
+
175
+ query = {'ticker': self.ticker}
176
+ filter_query = {"daily_data" : 1, "_id": 0}
177
+ ticker_full = self.collection.find_one(query, filter_query)
178
+
179
+ if not ticker_full:
180
+ raise ValueError("No ticker found in database")
181
+
182
+ daily_data = ticker_full.get("daily_data", [])
183
+ if not isinstance(daily_data, list):
184
+ raise TypeError("Expected 'daily_data' to be a list.")
185
+
186
+ df = pd.DataFrame(daily_data)
187
+
188
+ if not self.has_required_columns(df, required_cols):
189
+ missing_cols = [col for col in required_cols if col not in df.columns]
190
+ missing_cols = ",".join(missing_cols)
191
+ for col in missing_cols:
192
+ df[col] = pd.NA
193
+
194
+ return df[required_cols]
195
+
196
+
197
+ def conduct_db_search_twse(self):
198
+ required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
199
+ match_query = {"ticker" : self.ticker}
200
+ proj_query = {"_id": 0, "daily_data": 1}
201
+
202
+ full_data = self.twse_collection.find_one(match_query, proj_query)
203
+
204
+ if (not full_data):
205
+ raise ValueError("No ticker found in database twse_stats")
206
+
207
+ daily_data = full_data.get("daily_data", [])
208
+
209
+ if (not isinstance(daily_data, list)):
210
+ raise ValueError("No ticker found in database twse_stats")
211
+
212
+ df = pd.DataFrame(daily_data)
213
+ if not self.has_required_columns(df, required_cols):
214
+ raise KeyError(f"Missing required columns")
215
+
216
+ df = df[required_cols]
217
+ df = df.sort_values(by = 'date').drop_duplicates(subset=['date'])
218
+
219
+ return df
145
220
 
146
221
  class TechProcessor:
147
222