neurostats-API 0.0.21__tar.gz → 0.0.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. neurostats_api-0.0.22/MANIFEST.in +9 -0
  2. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/PKG-INFO +3 -3
  3. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/README.md +1 -1
  4. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/__init__.py +1 -1
  5. neurostats_api-0.0.22/neurostats_API/fetchers/balance_sheet.py +178 -0
  6. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/base.py +89 -74
  7. neurostats_api-0.0.22/neurostats_API/fetchers/cash_flow.py +200 -0
  8. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/finance_overview.py +2 -2
  9. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/month_revenue.py +1 -1
  10. neurostats_api-0.0.22/neurostats_API/fetchers/profit_lose.py +233 -0
  11. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/tech.py +73 -33
  12. neurostats_api-0.0.22/neurostats_API/fetchers/tej_finance_report.py +361 -0
  13. neurostats_api-0.0.22/neurostats_API/tools/company_list/tw.json +2175 -0
  14. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/tools/tej_db/tej_db_skip_index.yaml +3 -1
  15. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/tools/tej_db/tej_db_thousand_index.yaml +0 -1
  16. neurostats_api-0.0.22/neurostats_API/tools/twse/balance_sheet.yaml +35 -0
  17. neurostats_api-0.0.22/neurostats_API/tools/twse/cash_flow_percentage.yaml +39 -0
  18. neurostats_api-0.0.22/neurostats_API/tools/twse/finance_overview_dict.yaml +185 -0
  19. neurostats_api-0.0.22/neurostats_API/tools/twse/profit_lose.yaml +143 -0
  20. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/utils/__init__.py +0 -1
  21. neurostats_api-0.0.22/neurostats_API/utils/calculate_value.py +124 -0
  22. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/utils/data_process.py +43 -15
  23. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API.egg-info/PKG-INFO +3 -3
  24. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API.egg-info/SOURCES.txt +6 -2
  25. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API.egg-info/requires.txt +1 -1
  26. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/setup.py +3 -4
  27. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/test/test_fetchers.py +45 -22
  28. neurostats_api-0.0.21/MANIFEST.in +0 -2
  29. neurostats_api-0.0.21/neurostats_API/fetchers/balance_sheet.py +0 -151
  30. neurostats_api-0.0.21/neurostats_API/fetchers/cash_flow.py +0 -191
  31. neurostats_api-0.0.21/neurostats_API/fetchers/profit_lose.py +0 -158
  32. neurostats_api-0.0.21/neurostats_API/fetchers/tej_finance_report.py +0 -466
  33. neurostats_api-0.0.21/neurostats_API/utils/calculate_value.py +0 -26
  34. neurostats_api-0.0.21/neurostats_API/utils/fetcher.py +0 -1056
  35. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/cli.py +0 -0
  36. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/__init__.py +0 -0
  37. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/institution.py +0 -0
  38. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/margin_trading.py +0 -0
  39. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/value_invest.py +0 -0
  40. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/main.py +0 -0
  41. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/tools/tej_db/tej_db_index.yaml +0 -0
  42. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/tools/tej_db/tej_db_percent_index.yaml +0 -0
  43. {neurostats_api-0.0.21/neurostats_API/tools → neurostats_api-0.0.22/neurostats_API/tools/twse}/seasonal_data_field_dict.txt +0 -0
  44. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/utils/datetime.py +0 -0
  45. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/utils/db_client.py +0 -0
  46. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API.egg-info/dependency_links.txt +0 -0
  47. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API.egg-info/top_level.txt +0 -0
  48. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/setup.cfg +0 -0
  49. {neurostats_api-0.0.21 → neurostats_api-0.0.22}/test/test_tej.py +0 -0
@@ -0,0 +1,9 @@
1
+ recursive-include neurostats_api/tools/tej_db *.yaml
2
+ recursive-include neurostats_api/tools/tej_db *.txt
3
+ recursive-include neurostats_api/tools/tej_db *.json
4
+ recursive-include neurostats_api/tools/twse *.yaml
5
+ recursive-include neurostats_api/tools/twse *.txt
6
+ recursive-include neurostats_api/tools/twse *.json
7
+ recursive-include neurostats_api/tools/company_list *.yaml
8
+ recursive-include neurostats_api/tools/company_list *.txt
9
+ recursive-include neurostats_api/tools/company_list *.json
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: neurostats_API
3
- Version: 0.0.21
3
+ Version: 0.0.22
4
4
  Summary: The service of NeuroStats website
5
5
  Home-page: https://github.com/NeurowattStats/NeuroStats_API.git
6
6
  Author: JasonWang@Neurowatt
@@ -8,7 +8,7 @@ Author-email: jason@neurowatt.ai
8
8
  Requires-Python: >=3.6
9
9
  Description-Content-Type: text/markdown
10
10
  Requires-Dist: numpy
11
- Requires-Dist: pandas>=2.2.0
11
+ Requires-Dist: pandas
12
12
  Requires-Dist: pymongo
13
13
  Requires-Dist: pytz
14
14
  Requires-Dist: python-dotenv
@@ -89,7 +89,7 @@ pip install neurostats-API
89
89
  ```Python
90
90
  >>> import neurostats_API
91
91
  >>> print(neurostats_API.__version__)
92
- 0.0.21
92
+ 0.0.22
93
93
  ```
94
94
 
95
95
  ### 得到最新一期的評價資料與歷年評價
@@ -73,7 +73,7 @@ pip install neurostats-API
73
73
  ```Python
74
74
  >>> import neurostats_API
75
75
  >>> print(neurostats_API.__version__)
76
- 0.0.21
76
+ 0.0.22
77
77
  ```
78
78
 
79
79
  ### 得到最新一期的評價資料與歷年評價
@@ -1,4 +1,4 @@
1
- __version__='0.0.20'
1
+ __version__='0.0.22'
2
2
 
3
3
  from .fetchers import (
4
4
  BalanceSheetFetcher,
@@ -0,0 +1,178 @@
1
+ from .base import StatsFetcher, StatsDateTime
2
+ import json
3
+ import pandas as pd
4
+ from ..utils import StatsDateTime, StatsProcessor
5
+ import yaml
6
+
7
+
8
+ class BalanceSheetFetcher(StatsFetcher):
9
+ """
10
+ 對應iFa.ai -> 財務分析 -> 資產負債表
11
+ """
12
+
13
+ def __init__(self, ticker, db_client):
14
+ super().__init__(ticker, db_client)
15
+ self.table_settings = StatsProcessor.load_yaml("twse/balance_sheet.yaml")
16
+
17
+ self.process_function_map = {
18
+ "twse_stats": self.process_data_twse,
19
+ "us_stats": self.process_data_us
20
+ }
21
+
22
+ def prepare_query(self):
23
+ pipeline = super().prepare_query()
24
+
25
+ pipeline = pipeline + [
26
+ {
27
+ "$unwind": "$seasonal_data" # 展開 seasonal_data 陣列
28
+ },
29
+ {
30
+ "$project": {
31
+ "_id": 0,
32
+ "ticker": 1,
33
+ "company_name": 1,
34
+ "year": "$seasonal_data.year",
35
+ "season": "$seasonal_data.season",
36
+ "balance_sheet": {
37
+ "$ifNull": ["$seasonal_data.balance_sheet", []]
38
+ } # 避免 null
39
+ }
40
+ },
41
+ {
42
+ "$sort": {
43
+ "year": -1,
44
+ "season": -1
45
+ }
46
+ }
47
+ ]
48
+
49
+ return pipeline
50
+
51
+ def collect_data(self):
52
+ return super().collect_data()
53
+
54
+ def query_data(self):
55
+ fetched_data = self.collect_data()
56
+
57
+ process_fn = self.process_function_map[self.collection_name]
58
+ processed_data = process_fn(fetched_data)
59
+ return processed_data
60
+
61
+ def process_data_twse(self, fetched_data):
62
+ latest_time = StatsDateTime.get_latest_time(
63
+ self.ticker, self.collection
64
+ ).get('last_update_time', {})
65
+ # 取最新時間資料時間,沒取到就預設去年年底
66
+ target_year = latest_time.get('seasonal_data', {}).get(
67
+ 'latest_target_year',
68
+ StatsDateTime.get_today().year - 1
69
+ )
70
+ target_season = latest_time.get('seasonal_data',
71
+ {}).get('latest_season', 4)
72
+
73
+ return_dict = {
74
+ "ticker": self.ticker,
75
+ "company_name": fetched_data[-1]['company_name']
76
+ }
77
+ table_dict = {}
78
+
79
+ # 將value與percentage跟著年分季度一筆筆取出
80
+ for data in fetched_data:
81
+ year, season, balance_sheet = data['year'], data['season'], data[
82
+ 'balance_sheet']
83
+ time_index = f"{year}Q{season}"
84
+
85
+ new_balance_sheet = dict()
86
+ # 蒐集整體的keys
87
+ index_names = list(balance_sheet.keys())
88
+
89
+ table_dict[time_index] = balance_sheet
90
+ # flatten dict:
91
+ # {<key>: {"value": <value>, "percentage": <value>}}
92
+ # -> {<key>_value: <value>, <key>_percentage:<value>}
93
+
94
+ old_balance_sheet = pd.DataFrame(table_dict)
95
+ target_season_col = old_balance_sheet.columns.str.endswith(f"Q{target_season}")
96
+ old_balance_sheet = old_balance_sheet.loc[:, target_season_col]
97
+ old_balance_sheet = StatsProcessor.expand_value_percentage(old_balance_sheet)
98
+
99
+ for time_index, data_dict in table_dict.items():
100
+ new_balance_sheet = self.flatten_dict(
101
+ balance_sheet,
102
+ indexes = index_names,
103
+ target_keys=["value", "percentage"]
104
+ )
105
+
106
+ table_dict[time_index] = new_balance_sheet
107
+
108
+ total_table = pd.DataFrame.from_dict(table_dict)
109
+ value_index = total_table.index.str.endswith("_value")
110
+ total_table.loc[value_index, :] = (
111
+ total_table.loc[value_index, :].apply(
112
+ lambda x: StatsProcessor.cal_non_percentage(x, postfix="元"),
113
+ axis=0
114
+ )
115
+ )
116
+
117
+ percenrage_index = total_table.index.str.endswith(
118
+ "_percentage"
119
+ )
120
+ total_table.loc[percenrage_index, :] = (
121
+ total_table.loc[percenrage_index, :].apply(
122
+ lambda x: StatsProcessor.
123
+ cal_non_percentage(x, to_str=True, postfix="%"),
124
+ axis=0
125
+ )
126
+ )
127
+
128
+ target_season_columns = total_table.columns.str.endswith(
129
+ f"Q{target_season}"
130
+ )
131
+ total_table_YoY = total_table.loc[:, target_season_columns]
132
+
133
+ for name, setting in self.table_settings.items():
134
+ target_indexes = setting.get('target_index', [None])
135
+ for target_index in target_indexes:
136
+ try:
137
+ return_dict[name] = StatsProcessor.slice_old_table(
138
+ total_table=old_balance_sheet,
139
+ target_index=target_index
140
+ )
141
+ break
142
+ except Exception as e:
143
+ print(f"error : {str(e)}")
144
+ continue
145
+
146
+ return_dict.update(
147
+ {
148
+ "balance_sheet": old_balance_sheet,
149
+ "balance_sheet_all": total_table.copy(),
150
+ "balance_sheet_YoY": total_table_YoY
151
+ }
152
+ )
153
+ return return_dict
154
+
155
+ def process_data_us(self, fetched_data):
156
+ return_dict = {
157
+ "ticker": self.ticker,
158
+ "company_name": fetched_data[-1]['company_name']
159
+ }
160
+
161
+ table_dict = dict()
162
+
163
+ for data in fetched_data:
164
+ year, season, balance_sheet = data['year'], data['season'], data[
165
+ 'balance_sheet']
166
+ table_dict[f"{year}Q{season}"] = balance_sheet
167
+
168
+ table_dict = pd.DataFrame.from_dict(table_dict)
169
+
170
+ return_dict["balance_sheet"] = table_dict
171
+
172
+ latest_season = fetched_data[0]['season']
173
+ target_season_columns = table_dict.columns.str.endswith(
174
+ f"Q{latest_season}"
175
+ )
176
+ table_dict_YoY = table_dict.loc[:, target_season_columns]
177
+ return_dict["balance_sheet_YoY"] = table_dict_YoY
178
+ return return_dict
@@ -1,22 +1,31 @@
1
1
  import abc
2
- from typing import Union
3
- from pymongo import MongoClient
4
- import pandas as pd
2
+ from datetime import datetime, timedelta, date
5
3
  import json
4
+ import pandas as pd
5
+ from pymongo import MongoClient
6
6
  import pytz
7
- from datetime import datetime, timedelta, date
7
+ from typing import Union
8
8
  from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
9
- import yaml
10
9
 
11
10
 
12
- class StatsFetcher:
11
+ class StatsFetcher(abc.ABC):
13
12
 
14
- def __init__(self, ticker, db_client):
13
+ def __init__(self, ticker: str, db_client: MongoClient):
15
14
  self.ticker = ticker
16
- self.db = db_client["company"] # Replace with your database name
17
- self.collection = self.db["twse_stats"]
18
-
19
15
  self.timezone = pytz.timezone("Asia/Taipei")
16
+ self.tw_company_list = StatsProcessor.load_json("company_list/tw.json")
17
+ db_mapping = {
18
+ "company": "twse_stats",
19
+ "company_us": "us_stats",
20
+ }
21
+
22
+ name_mapping = {"company": "台股", "company_us": "美股"}
23
+
24
+ db_name = "company" if self.ticker in self.tw_company_list else "company_us"
25
+ self.db = db_client[db_name]
26
+ self.collection_name = db_mapping.get(db_name, "unknown")
27
+ assert self.collection_name != "unknown", f"請確認 {ticker} 是否是 {','.join(list(name_mapping.values()))}"
28
+ self.collection = db_client[db_name][self.collection_name]
20
29
 
21
30
  self.target_metric_dict = {
22
31
  'value': ['value'],
@@ -37,40 +46,41 @@ class StatsFetcher:
37
46
  }
38
47
  },
39
48
  ]
49
+
50
+ def query_data(self):
51
+ return NotImplementedError()
40
52
 
41
- def collect_data(self, start_date, end_date):
53
+ def collect_data(self):
42
54
  pipeline = self.prepare_query()
43
-
44
55
  fetched_data = list(self.collection.aggregate(pipeline))
56
+ return fetched_data if fetched_data else None
45
57
 
46
- return fetched_data[0]
47
-
48
- def str_to_datetime(self, date_str):
49
- year, month, day = [int(num) for num in date_str.split("-")]
50
-
51
- date = datetime.strptime(date_str, "%Y-%m-%d")
52
- date = self.timezone.localize(date)
53
-
58
+ def str_to_datetime(self, date_str: str) -> StatsDateTime:
59
+ date = self.timezone.localize(datetime.strptime(date_str, "%Y-%m-%d"))
60
+ year, month, day = date.year, date.month, date.day
54
61
  season = (month - 1) // 3 + 1
55
-
56
62
  return StatsDateTime(date, year, month, day, season)
57
63
 
58
- def has_required_columns(self, df: pd.DataFrame, required_cols=None):
59
- """
60
- Check if the required columns are present in the DataFrame.
61
-
62
- Args:
63
- df (pd.DataFrame): The DataFrame to check.
64
- required_cols (list, optional): List of required column names.
65
- Defaults to ['date', 'open', 'high', 'low', 'close', 'volume'].
66
-
67
- Returns:
68
- bool: True if all required columns are present, False otherwise.
69
- """
64
+ def has_required_columns(
65
+ self, df: pd.DataFrame, required_cols=None
66
+ ) -> bool:
70
67
  if required_cols is None:
71
68
  required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
72
-
73
69
  return all(col in df.columns for col in required_cols)
70
+
71
+ @staticmethod
72
+ def flatten_dict(value_dict, indexes, target_keys):
73
+ indexes = value_dict.keys()
74
+ new_dict = {}
75
+
76
+ for key in indexes:
77
+ new_dict.update(
78
+ {
79
+ f"{key}_{sub_key}": value_dict[key].get(sub_key, None)
80
+ for sub_key in target_keys
81
+ }
82
+ )
83
+ return new_dict
74
84
 
75
85
 
76
86
  class BaseTEJFetcher(abc.ABC):
@@ -81,21 +91,14 @@ class BaseTEJFetcher(abc.ABC):
81
91
 
82
92
  def get_latest_data_time(self, ticker):
83
93
  latest_data = self.collection.find_one(
84
- {
85
- "ticker": ticker
86
- },
87
- {
94
+ {"ticker": ticker}, {
88
95
  "last_update": 1,
89
96
  "_id": 0
90
97
  }
91
98
  )
92
99
 
93
- try:
94
- latest_date = latest_data['last_update']["latest_data_date"]
95
- except Exception as e:
96
- latest_date = None
97
-
98
- return latest_date
100
+ # return 得到最新日期或None
101
+ return latest_data.get('last_update', {}).get("latest_data_date", None)
99
102
 
100
103
  def process_value(self, value):
101
104
  if isinstance(value, str) and "%" in value:
@@ -107,77 +110,89 @@ class BaseTEJFetcher(abc.ABC):
107
110
 
108
111
  def calculate_growth(self, this_value, last_value, delta):
109
112
  try:
110
- return YoY_Calculator.cal_growth(this_value, last_value, delta) * 100
113
+ return YoY_Calculator.cal_growth(
114
+ this_value, last_value, delta
115
+ ) * 100
111
116
  except Exception:
112
117
  return None
113
118
 
114
- def cal_YoY(self, data_dict: dict, start_year: int, end_year: int, season: int):
119
+ def cal_YoY(
120
+ self, data_dict: dict, start_year: int, end_year: int, season: int
121
+ ):
115
122
  year_shifts = [1, 3, 5, 10]
116
123
  return_dict = {}
117
-
124
+
118
125
  for year in range(start_year, end_year + 1):
119
126
  year_data = data_dict.get(f"{year}Q{season}", {}).copy()
120
127
  if not year_data:
121
128
  continue
122
-
123
- for key in list(year_data.keys()):
129
+
130
+ for key, value in list(year_data.items()):
124
131
  if key == "season":
125
132
  continue
126
-
127
- this_value = self.process_value(year_data[key])
133
+
134
+ this_value = self.process_value(value)
128
135
  if this_value is None:
129
136
  year_data.pop(key)
130
137
  continue
131
-
132
- temp_dict = {"value": year_data[key]}
138
+
139
+ temp_dict = {"value": value}
133
140
  for shift in year_shifts:
134
- past_year = year - shift
135
- last_value = data_dict.get(f"{past_year}Q{season}", {}).get(key)
136
- last_value = self.process_value(last_value)
137
- growth = self.calculate_growth(this_value, last_value, shift) if last_value is not None else None
141
+ past_value = self.process_value(
142
+ data_dict.get(f"{year - shift}Q{season}", {}).get(key)
143
+ )
144
+ growth = self.calculate_growth(this_value, past_value, shift) if past_value else None
145
+
146
+ temp_dict[
147
+ f"YoY_{shift}"
148
+ ] = f"{growth:.2f}%" if growth else None
138
149
 
139
- temp_dict[f"YoY_{shift}"] = (f"{growth:.2f}%" if growth else None)
140
150
  year_data[key] = temp_dict
141
-
142
151
  return_dict[f"{year}Q{season}"] = year_data
143
-
144
152
  return return_dict
145
153
 
146
154
  def cal_QoQ(self, data_dict):
147
155
  return_dict = {}
148
-
156
+
149
157
  for time_index, this_data in data_dict.items():
150
158
  year, season = map(int, time_index.split("Q"))
151
- last_year, last_season = (year - 1, 4) if season == 1 else (year, season - 1)
152
-
159
+ last_year, last_season = (
160
+ year - 1, 4
161
+ ) if season == 1 else (year, season - 1)
162
+
153
163
  for key in list(this_data.keys()):
154
164
  if key == "season":
155
165
  continue
156
-
166
+
157
167
  this_value = self.process_value(this_data[key])
158
168
  if this_value is None:
159
169
  this_data.pop(key)
160
170
  continue
161
-
171
+
162
172
  temp_dict = {"value": this_data[key]}
163
- last_value = data_dict.get(f"{last_year}Q{last_season}", {}).get(key, {}).get('value')
173
+ last_value = data_dict.get(
174
+ f"{last_year}Q{last_season}",{}
175
+ ).get(key, {}).get('value')
176
+
164
177
  last_value = self.process_value(last_value)
165
- growth = self.calculate_growth(this_value, last_value, 1) if last_value is not None else None
178
+ growth = self.calculate_growth(
179
+ this_value, last_value, 1
180
+ ) if last_value is not None else None
166
181
  temp_dict['growth'] = (f"{growth:.2f}%" if growth else None)
167
-
182
+
168
183
  this_data[key] = temp_dict
169
-
184
+
170
185
  return_dict[time_index] = this_data
171
-
186
+
172
187
  return return_dict
173
188
 
174
189
  def get_dict_of_df(self, data_dict):
175
190
  """
176
191
  dict[dict] -> dict[df]
177
192
  """
178
- for key in data_dict.keys():
179
- data_dict[key] = pd.DataFrame.from_dict(data_dict[key])
180
- return data_dict
193
+ return {
194
+ key: pd.DataFrame.from_dict(data) for key, data in data_dict.items()
195
+ }
181
196
 
182
197
  def set_time_shift(self, date: Union[str, datetime], period: str):
183
198
  if isinstance(date, str):
@@ -197,4 +212,4 @@ class BaseTEJFetcher(abc.ABC):
197
212
  if period == "all":
198
213
  return datetime.strptime("1991-01-01", "%Y-%m-%d")
199
214
 
200
- return date - period_mapping.get(period, timedelta(days=0)) # 預設為不變"
215
+ return date - period_mapping.get(period, timedelta(days=0)) # 預設為不變"
@@ -0,0 +1,200 @@
1
+ from .base import StatsFetcher, StatsDateTime
2
+ import json
3
+ import numpy as np
4
+ import pandas as pd
5
+ from ..utils import StatsDateTime, StatsProcessor
6
+ import importlib.resources as pkg_resources
7
+ import yaml
8
+
9
+ class CashFlowFetcher(StatsFetcher):
10
+ def __init__(self, ticker, db_client):
11
+ super().__init__(ticker, db_client)
12
+
13
+ self.cash_flow_dict = StatsProcessor.load_yaml(
14
+ "twse/cash_flow_percentage.yaml"
15
+ ) # 計算子表格用
16
+
17
+ self.process_function_map = {
18
+ "twse_stats": self.process_data_twse,
19
+ "us_stats": self.process_data_us
20
+ }
21
+
22
+ def prepare_query(self):
23
+ pipeline = super().prepare_query()
24
+
25
+ name_map = {
26
+ "twse_stats": "cash_flow",
27
+ "us_stats": "cash_flow"
28
+ }
29
+
30
+
31
+ chart_name = name_map.get(self.collection_name, "cash_flow")
32
+
33
+ append_pipeline = [
34
+ {
35
+ "$unwind": "$seasonal_data" # 展開 seasonal_data 陣列
36
+ },
37
+ {
38
+ "$project": {
39
+ "_id": 0,
40
+ "ticker": 1,
41
+ "company_name": 1,
42
+ "year": "$seasonal_data.year",
43
+ "season": "$seasonal_data.season",
44
+ "cash_flow": {
45
+ "$ifNull": [f"$seasonal_data.{chart_name}", []]
46
+ } # 避免 null
47
+ }
48
+ },
49
+ {
50
+ "$sort": {
51
+ "year": -1,
52
+ "season": -1
53
+ }
54
+ }
55
+ ]
56
+
57
+ pipeline = pipeline + append_pipeline
58
+
59
+ return pipeline
60
+
61
+ def collect_data(self):
62
+ return super().collect_data()
63
+
64
+ def query_data(self):
65
+ fetched_data = self.collect_data()
66
+
67
+ process_fn = self.process_function_map.get(self.collection_name, self.process_data_us)
68
+ return process_fn(fetched_data)
69
+
70
+ def process_data_twse(self, fetched_data):
71
+ """
72
+ 處理現金流量表頁面的所有表格
73
+ 金流表本身沒有比例 但是Ifa有算,
74
+ 項目所屬的情況也不一(分別所屬營業,投資,籌資三個活動)
75
+ 所以這裡選擇不用slicing處理
76
+ """
77
+
78
+ index_names = []
79
+ column_names = []
80
+
81
+ table_dict = dict()
82
+ CASHO_dict = dict()
83
+ CASHI_dict = dict()
84
+ CASHF_dict = dict()
85
+
86
+ # 處理cash_flow 比例
87
+ checkpoints = ["營業活動之現金流量-間接法", "投資活動之現金流量", "籌資活動之現金流量", "匯率變動對現金及約當現金之影響"]
88
+ main_cash_flows = [
89
+ "營業活動之淨現金流入(流出)", "投資活動之淨現金流入(流出)", "籌資活動之淨現金流入(流出)", None
90
+ ] # 主要的比例對象
91
+ partial_cash_flows = [CASHO_dict, CASHI_dict, CASHF_dict, dict()]
92
+
93
+ # 作法: dictionary中也有checkpoints,如果出現了就換下一個index去計算
94
+
95
+ for data in fetched_data:
96
+ year, season, cash_flow = data['year'], data['season'], data['cash_flow']
97
+
98
+ time_index = f"{year}Q{season}"
99
+
100
+ main_cash_flow_name = None
101
+ partial_cash_flow = None
102
+ next_checkpoint = 0
103
+
104
+ temp_dict = {}
105
+
106
+ for index_name, cash_flow_value in cash_flow.items():
107
+ if (next_checkpoint < 3
108
+ and index_name == checkpoints[next_checkpoint]): # 找到了主要的變動點
109
+ main_cash_flow_name = main_cash_flows[next_checkpoint]
110
+ partial_cash_flow = partial_cash_flows[next_checkpoint]
111
+ partial_cash_flow[time_index] = {}
112
+ next_checkpoint += 1
113
+
114
+ if (isinstance(cash_flow_value, dict)):
115
+ value = cash_flow_value.get('value', None)
116
+ else:
117
+ value = cash_flow_value
118
+
119
+
120
+ main_value = cash_flow.get(main_cash_flow_name, None)
121
+ if (isinstance(main_value, dict)):
122
+ main_value = main_value.get('value', None)
123
+ else:
124
+ pass
125
+
126
+ try:
127
+ ratio = np.round(
128
+ (value / main_value) * 100, 2
129
+ )
130
+ ratio = f"{ratio}%"
131
+ except:
132
+ ratio = None
133
+
134
+ value = StatsProcessor.cal_non_percentage(value, postfix="千元")
135
+ temp_dict[index_name] = {
136
+ "value" : value,
137
+ "percentage": ratio
138
+ }
139
+
140
+ partial_cash_flow[time_index][index_name] = temp_dict[index_name]
141
+
142
+ table_dict[time_index] = temp_dict
143
+ index_names += list(cash_flow.keys())
144
+
145
+ # 轉成dictionary keys
146
+ index_names = list(dict.fromkeys(index_names))
147
+
148
+ cash_flow_table = pd.DataFrame(table_dict)
149
+ cash_flow_table_stats = StatsProcessor.expand_value_percentage(cash_flow_table)
150
+
151
+ CASHO_table = pd.DataFrame(CASHO_dict)
152
+ CASHO_table = StatsProcessor.expand_value_percentage(CASHO_table)
153
+
154
+ CASHI_table = pd.DataFrame(CASHI_dict)
155
+ CASHI_table = StatsProcessor.expand_value_percentage(CASHI_table)
156
+
157
+ CASHF_table = pd.DataFrame(CASHF_dict)
158
+ CASHF_table = StatsProcessor.expand_value_percentage(CASHF_table)
159
+
160
+ for time_index in table_dict.keys():
161
+ table_dict[time_index] = self.flatten_dict(table_dict[time_index], index_names, target_keys=['value', 'percentage'])
162
+ cash_flow_flatten = pd.DataFrame.from_dict(table_dict)
163
+
164
+ target_season = fetched_data[0]['season']
165
+ target_season_column = cash_flow_flatten.columns.str.endswith(f"Q{target_season}")
166
+
167
+ return_dict = {
168
+ "ticker": self.ticker,
169
+ "company_name": fetched_data[-1]['company_name'],
170
+ "cash_flow": cash_flow_table_stats,
171
+ "CASHO": CASHO_table,
172
+ "CASHI": CASHI_table,
173
+ "CASHF": CASHF_table,
174
+ "cash_flow_all": cash_flow_flatten,
175
+ "cash_flow_YoY": cash_flow_flatten.loc[:, target_season_column]
176
+ }
177
+ return return_dict
178
+
179
+ def process_data_us(self, fetched_data):
180
+
181
+ table_dict = {
182
+ f"{data['year']}Q{data['season']}": data['cash_flow']
183
+ for data in fetched_data
184
+ }
185
+
186
+ cash_flow_df = pd.DataFrame.from_dict(table_dict)
187
+
188
+ latest_season = fetched_data[0]['season']
189
+ target_season_columns = cash_flow_df.columns.str.endswith(
190
+ f"Q{latest_season}"
191
+ )
192
+ cash_flow_df_YoY = cash_flow_df.loc[:, target_season_columns]
193
+
194
+ return_dict = {
195
+ "ticker": self.ticker,
196
+ "company_name": fetched_data[-1]['company_name'],
197
+ "cash_flow": cash_flow_df,
198
+ "cash_flow_YoY": cash_flow_df_YoY
199
+ }
200
+ return return_dict