neurostats-API 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1,13 @@
1
- __version__='0.0.14'
1
+ __version__='0.0.16'
2
+
3
+ from .fetchers import (
4
+ BalanceSheetFetcher,
5
+ CashFlowFetcher,
6
+ FinanceOverviewFetcher,
7
+ FinanceReportFetcher,
8
+ InstitutionFetcher,
9
+ MarginTradingFetcher,
10
+ MonthRevenueFetcher,
11
+ TechFetcher,
12
+ ProfitLoseFetcher
13
+ )
@@ -2,6 +2,8 @@ from .base import StatsDateTime, StatsFetcher
2
2
  from .balance_sheet import BalanceSheetFetcher
3
3
  from .cash_flow import CashFlowFetcher
4
4
  from .finance_overview import FinanceOverviewFetcher
5
+ from .tej_finance_report import FinanceReportFetcher
6
+ from .tech import TechFetcher
5
7
  from .institution import InstitutionFetcher
6
8
  from .margin_trading import MarginTradingFetcher
7
9
  from .month_revenue import MonthRevenueFetcher
@@ -1,16 +1,18 @@
1
+ import abc
1
2
  from pymongo import MongoClient
2
3
  import pandas as pd
3
4
  import json
4
5
  import pytz
5
6
  from datetime import datetime, timedelta, date
6
- from ..utils import StatsDateTime, StatsProcessor
7
+ from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
7
8
  import yaml
8
9
 
10
+
9
11
  class StatsFetcher:
12
+
10
13
  def __init__(self, ticker, db_client):
11
14
  self.ticker = ticker
12
- self.db = db_client[
13
- "company"] # Replace with your database name
15
+ self.db = db_client["company"] # Replace with your database name
14
16
  self.collection = self.db["twse_stats"]
15
17
 
16
18
  self.timezone = pytz.timezone("Asia/Taipei")
@@ -26,7 +28,6 @@ class StatsFetcher:
26
28
  'grand_total_growth': [f"YoY_{i}" for i in [1, 3, 5, 10]]
27
29
  }
28
30
 
29
-
30
31
  def prepare_query(self):
31
32
  return [
32
33
  {
@@ -37,7 +38,7 @@ class StatsFetcher:
37
38
  ]
38
39
 
39
40
  def collect_data(self, start_date, end_date):
40
- pipeline = self.prepare_query(start_date, end_date)
41
+ pipeline = self.prepare_query()
41
42
 
42
43
  fetched_data = list(self.collection.aggregate(pipeline))
43
44
 
@@ -52,3 +53,102 @@ class StatsFetcher:
52
53
  season = (month - 1) // 3 + 1
53
54
 
54
55
  return StatsDateTime(date, year, month, day, season)
56
+
57
+
58
+ class BaseTEJFetcher(abc.ABC):
59
+
60
+ def __init__(self):
61
+ self.client = None
62
+ self.db = None
63
+ self.collection = None
64
+
65
+ @abc.abstractmethod
66
+ def get(self):
67
+ pass
68
+
69
+ def get_latest_data_time(self, ticker):
70
+ latest_data = self.collection.find_one({"ticker": ticker}, {"last_update": 1, "_id": 0})
71
+
72
+ try:
73
+ latest_date = latest_data['last_update']["latest_data_date"]
74
+ except Exception as e:
75
+ latest_date = None
76
+
77
+ return latest_date
78
+
79
+ def cal_YoY(self, data_dict: dict, start_year: int, end_year: int, season: int):
80
+ year_shifts = [1, 3, 5, 10]
81
+ return_dict = {}
82
+ for year in range(start_year, end_year + 1):
83
+ year_data = data_dict[f"{year}Q{season}"]
84
+ year_keys = list(year_data.keys())
85
+ for key in year_keys:
86
+ if (key in 'season'):
87
+ continue
88
+
89
+ if (isinstance(year_data[key], (int, float))):
90
+ temp_dict = {"value": year_data[key]}
91
+
92
+ for shift in year_shifts:
93
+ this_value = year_data[key]
94
+ try:
95
+ past_year = str(year - shift)
96
+ last_value = data_dict[f"{past_year}Q{season}"][key]
97
+ temp_dict[f"YoY_{shift}"] = YoY_Calculator.cal_growth(this_value, last_value, delta=shift)
98
+ except Exception as e:
99
+ temp_dict[f"YoY_{shift}"] = None
100
+
101
+ year_data[key] = temp_dict
102
+
103
+ else:
104
+ year_data.pop(key)
105
+
106
+ return_dict[f"{year}Q{season}"] = year_data
107
+
108
+ return return_dict
109
+
110
+ def cal_QoQ(self, data_dict):
111
+ return_dict = {}
112
+ for i, time_index in enumerate(data_dict.keys()):
113
+ year, season = time_index.split("Q")
114
+ year = int(year)
115
+ season = int(season)
116
+ if (season == 1):
117
+ last_year = year - 1
118
+ last_season = 4
119
+ else:
120
+ last_year = year
121
+ last_season = season - 1
122
+
123
+ this_data = data_dict[time_index]
124
+ this_keys = list(this_data.keys())
125
+ for key in this_keys:
126
+ if (key in 'season'):
127
+ continue
128
+
129
+ this_value = this_data[key]
130
+
131
+ if (isinstance(this_value, (int, float))):
132
+ temp_dict = {"value": this_value}
133
+
134
+ try:
135
+ last_value = data_dict[f"{last_year}Q{last_season}"][key]['value']
136
+
137
+ temp_dict['growth'] = YoY_Calculator.cal_growth(this_value, last_value, delta=1)
138
+ except Exception as e:
139
+ temp_dict['growth'] = None
140
+
141
+ this_data[key] = temp_dict
142
+
143
+ else:
144
+ this_data.pop(key)
145
+ return_dict[time_index] = this_data
146
+ return return_dict
147
+
148
+ def get_dict_of_df(self, data_dict):
149
+ """
150
+ dict[dict] -> dict[df]
151
+ """
152
+ for key in data_dict.keys():
153
+ data_dict[key] = pd.DataFrame.from_dict(data_dict[key])
154
+ return data_dict
@@ -1,9 +1,16 @@
1
1
  from .base import StatsFetcher
2
2
  import pandas as pd
3
+ import yfinance as yf
3
4
 
4
5
  class TechFetcher(StatsFetcher):
5
6
 
6
7
  def __init__(self, ticker:str, db_client):
8
+
9
+ """
10
+ The Capitalization-Weighted Index includes the following tickers:
11
+ ['GSPC', 'IXIC', 'DJI', 'TWII']
12
+ """
13
+
7
14
  super().__init__(ticker, db_client)
8
15
  self.full_ohlcv = self._get_ohlcv()
9
16
  self.basic_indexes = ['SMA5', 'SMA20', 'SMA60', 'EMA5', 'EMA20',
@@ -40,16 +47,36 @@ class TechFetcher(StatsFetcher):
40
47
  )
41
48
 
42
49
  def _get_ohlcv(self):
43
- query = {'ticker': self.ticker}
44
- ticker_full = list(self.collection.find(query))
50
+
51
+ if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
52
+
53
+ full_tick = f'^{self.ticker}'
54
+ yf_ticker = yf.Ticker(full_tick)
55
+ origin_df = yf_ticker.history(period="10y")
56
+ origin_df = origin_df.reset_index()
57
+ origin_df["Date"] = pd.to_datetime(origin_df["Date"]).dt.date
58
+ df = origin_df.rename(
59
+ columns={
60
+ "Date": "date",
61
+ "Open": "open",
62
+ "High": "high",
63
+ "Low": "low",
64
+ "Close": "close",
65
+ "Volume": "volume"
66
+ }
67
+ )
68
+ else:
69
+
70
+ query = {'ticker': self.ticker}
71
+ ticker_full = list(self.collection.find(query))
45
72
 
46
- if not ticker_full:
47
- raise ValueError(f"No data found for ticker: {self.ticker}")
73
+ if not ticker_full:
74
+ raise ValueError(f"No data found for ticker: {self.ticker}")
48
75
 
49
- if 'daily_data' not in ticker_full[0] or ticker_full[0]['daily_data'] is None:
50
- raise KeyError("Missing 'daily_data' in the retrieved data")
76
+ if 'daily_data' not in ticker_full[0] or ticker_full[0]['daily_data'] is None:
77
+ raise KeyError("Missing 'daily_data' in the retrieved data")
51
78
 
52
- df = pd.DataFrame(ticker_full[0]['daily_data'])
79
+ df = pd.DataFrame(ticker_full[0]['daily_data'])
53
80
 
54
81
  selected_cols = ['date','open','high','low','close','volume']
55
82
 
@@ -0,0 +1,339 @@
1
+ from .base import BaseTEJFetcher
2
+ from datetime import datetime
3
+ from enum import Enum
4
+ import pandas as pd
5
+ from pymongo import MongoClient
6
+ from ..utils import StatsProcessor, YoY_Calculator
7
+ import warnings
8
+
9
+
10
+ class FinanceReportFetcher(BaseTEJFetcher):
11
+
12
+ class FetchMode(Enum):
13
+ YOY = 1
14
+ QOQ = 2
15
+ YOY_NOCAL = 3
16
+ QOQ_NOCAL = 4
17
+
18
+ def __init__(self, mongo_uri, db_name="company", collection_name="TWN/AINVFQ1"):
19
+ self.client = MongoClient(mongo_uri)
20
+ self.db = self.client[db_name]
21
+ self.collection = self.db[collection_name]
22
+
23
+ # yapf: disabled
24
+ self.check_index = {
25
+ 'coid', 'mdate', 'key3', 'no', 'sem', 'merg', 'curr', 'annd', 'fin_ind', 'bp11', 'bp21', 'bp22', 'bp31',
26
+ 'bp41', 'bp51', 'bp53', 'bp61', 'bp62', 'bp63', 'bp64', 'bp65', 'bf11', 'bf12', 'bf21', 'bf22', 'bf41',
27
+ 'bf42', 'bf43', 'bf44', 'bf45', 'bf99', 'bsca', 'bsnca', 'bsta', 'bscl', 'bsncl', 'bstl', 'bsse', 'bslse',
28
+ 'debt', 'quick', 'ppe', 'ar', 'ip12', 'ip22', 'ip31', 'ip51', 'iv41', 'if11', 'isibt', 'isni', 'isnip',
29
+ 'eps', 'ispsd', 'gm', 'opi', 'nri', 'ri', 'nopi', 'ebit', 'cip31', 'cscfo', 'cscfi', 'cscff', 'person',
30
+ 'shares', 'wavg', 'taxrate', 'r104', 'r115', 'r105', 'r106', 'r107', 'r108', 'r201', 'r112', 'r401', 'r402',
31
+ 'r403', 'r404', 'r405', 'r408', 'r409', 'r410', 'r502', 'r501', 'r205', 'r505', 'r517', 'r512', 'r509',
32
+ 'r608', 'r616', 'r610', 'r607', 'r613', 'r612', 'r609', 'r614', 'r611', 'r307', 'r304', 'r305', 'r306',
33
+ 'r316', 'r834'
34
+ } # yapf: enabled
35
+
36
+ def get(
37
+ self,
38
+ ticker,
39
+ fetch_mode: FetchMode = FetchMode.QOQ_NOCAL,
40
+ start_date: str = None,
41
+ end_date: str = None,
42
+ report_type: str = "Q",
43
+ indexes: list = []):
44
+ """
45
+ 基礎的query function
46
+ ticker(str): 股票代碼
47
+ start_date(str): 開頭日期範圍
48
+ end_date(str): = 結束日期範圍
49
+ report_type(str): 報告型態 {"A", "Q", "TTM"}
50
+ fetch_mode(class FetchMode):
51
+ YoY : 起始日期到結束日期範圍內,特定該季的資料
52
+ QoQ : 起始日期到結束日期內,每季的資料(與上一季成長率)
53
+ indexes(List): 指定的index
54
+ """
55
+ # 確認indexes中是否有錯誤的index,有的話回傳warning
56
+ if (indexes and self.check_index):
57
+ indexes = set(indexes)
58
+ difference = indexes - self.check_index
59
+ if (difference):
60
+ warnings.warn(f"{list(difference)} 沒有出現在資料表中,請確認column名稱是否正確", UserWarning)
61
+
62
+ if (fetch_mode in {self.FetchMode.QOQ, self.FetchMode.QOQ_NOCAL}):
63
+ if (not start_date):
64
+ warnings.warn("No start_date specified, use default date = \"2005-01-01\"", UserWarning)
65
+ start_date = datetime.strptime("2005-01-01", "%Y-%m-%d")
66
+ if (not end_date):
67
+ warnings.warn("No end_date specified, use default date = today", UserWarning)
68
+ end_date = datetime.today()
69
+
70
+ assert (start_date <= end_date)
71
+ start_date = datetime.strptime(start_date, "%Y-%m-%d")
72
+ end_date = datetime.strptime(end_date, "%Y-%m-%d")
73
+
74
+ start_year = start_date.year
75
+ start_season = (start_date.month - 1) // 4 + 1
76
+ end_year = end_date.year
77
+ end_season = (end_date.month - 1) // 4 + 1
78
+
79
+ if (fetch_mode == self.FetchMode.QOQ):
80
+ use_cal = True
81
+ else:
82
+ use_cal = False
83
+
84
+ data_df = self.get_QoQ_data(
85
+ ticker=ticker,
86
+ start_year=start_year,
87
+ start_season=start_season,
88
+ end_year=end_year,
89
+ end_season=end_season,
90
+ report_type=report_type,
91
+ indexes=indexes,
92
+ use_cal=use_cal)
93
+
94
+ return data_df
95
+
96
+ elif (fetch_mode in {self.FetchMode.YOY, self.FetchMode.YOY_NOCAL}):
97
+ start_date = datetime.strptime(start_date, "%Y-%m-%d")
98
+ start_year = start_date.year
99
+ end_date = self.get_latest_data_time(ticker)
100
+ if (not end_date):
101
+ end_date = datetime.today()
102
+
103
+ end_year = end_date.year
104
+ season = (end_date.month - 1) // 4 + 1
105
+
106
+ if (fetch_mode == self.FetchMode.YOY):
107
+ use_cal = True
108
+ else:
109
+ use_cal = False
110
+
111
+ data_df = self.get_YoY_data(
112
+ ticker=ticker,
113
+ start_year=start_year,
114
+ end_year=end_year,
115
+ season=season,
116
+ report_type=report_type,
117
+ indexes=indexes,
118
+ use_cal=use_cal)
119
+
120
+ return data_df
121
+
122
+ def get_QoQ_data(
123
+ self, ticker, start_year, start_season, end_year, end_season, report_type="Q", indexes=[], use_cal=False):
124
+ """
125
+ 取得時間範圍內每季資料
126
+ """
127
+ if (use_cal):
128
+ if (start_season == 1):
129
+ lower_bound_year = start_year - 1
130
+ lower_bound_season = 4
131
+
132
+ else:
133
+ lower_bound_year = start_year
134
+ lower_bound_season = start_season - 1
135
+
136
+ else:
137
+ lower_bound_year = start_year,
138
+ lower_bound_season = start_season
139
+
140
+ if (not indexes): # 沒有指定 -> 取全部
141
+ pipeline = [
142
+ {
143
+ "$match": {
144
+ "ticker": ticker
145
+ }
146
+ }, {
147
+ "$unwind": "$data"
148
+ }, {
149
+ "$match":
150
+ {
151
+ "$or":
152
+ [
153
+ {
154
+ "data.year": {
155
+ "$gt": start_year,
156
+ "$lt": end_year
157
+ }
158
+ }, {
159
+ "data.year": start_year,
160
+ "data.season": {
161
+ "$gte": start_season
162
+ }
163
+ }, {
164
+ "data.year": end_year,
165
+ "data.season": {
166
+ "$lte": end_season
167
+ }
168
+ }, {
169
+ "data.year": lower_bound_year,
170
+ "data.season": lower_bound_season
171
+ }
172
+ ]
173
+ }
174
+ }, {
175
+ "$project": {
176
+ "data.year": 1,
177
+ "data.season": 1,
178
+ f"data.{report_type}": 1,
179
+ "_id": 0
180
+ }
181
+ }
182
+ ]
183
+
184
+ else: # 取指定index
185
+ project_stage = {"data.year": 1, "data.season": 1}
186
+ for index in indexes:
187
+ project_stage[f"data.{report_type}.{index}"] = 1
188
+
189
+ pipeline = [
190
+ {
191
+ "$match": {
192
+ "ticker": ticker
193
+ }
194
+ }, {
195
+ "$unwind": "$data"
196
+ }, {
197
+ "$match":
198
+ {
199
+ "$or":
200
+ [
201
+ {
202
+ "data.year": {
203
+ "$gt": start_year,
204
+ "$lt": end_year
205
+ }
206
+ }, {
207
+ "data.year": start_year,
208
+ "data.season": {
209
+ "$gte": start_season
210
+ }
211
+ }, {
212
+ "data.year": end_year,
213
+ "data.season": {
214
+ "$lte": end_season
215
+ }
216
+ }, {
217
+ "data.year": lower_bound_year,
218
+ "data.season": lower_bound_season
219
+ }
220
+ ]
221
+ }
222
+ }, {
223
+ "$project": project_stage
224
+ }
225
+ ]
226
+
227
+ fetched_data = self.collection.aggregate(pipeline).to_list()
228
+
229
+ data_dict = StatsProcessor.list_of_dict_to_dict(
230
+ fetched_data, keys=["year", "season"], delimeter="Q", data_key=report_type)
231
+
232
+ if (use_cal):
233
+ data_with_QoQ = self.cal_QoQ(data_dict)
234
+ data_df = pd.DataFrame.from_dict(data_with_QoQ)
235
+ data_df = data_df.iloc[:, 1:]
236
+ data_df = data_df.iloc[:, ::-1].T
237
+ data_dict = data_df.to_dict()
238
+ data_dict = self.get_dict_of_df(data_dict)
239
+ return data_dict
240
+ else:
241
+ data_df = pd.DataFrame.from_dict(data_dict)
242
+ data_df = data_df.iloc[:, ::-1]
243
+ return data_df
244
+
245
+ def get_YoY_data(self, ticker, start_year, end_year, season, report_type="Q", indexes=[], use_cal=False):
246
+ """
247
+ 取得某季歷年資料
248
+ """
249
+ if (use_cal):
250
+ select_year = set()
251
+
252
+ for year in range(start_year, end_year + 1):
253
+ year_shifts = {year, year - 1, year - 3, year - 5, year - 10}
254
+
255
+ select_year = select_year.union(year_shifts)
256
+
257
+ select_year = sorted(list(select_year), reverse=True)
258
+ else:
259
+ select_year = [year for year in range(start_year, end_year + 1)]
260
+
261
+ if (not indexes): # 沒有指定 -> 取全部
262
+ pipeline = [
263
+ {
264
+ "$match": {
265
+ "ticker": ticker
266
+ }
267
+ }, {
268
+ "$unwind": "$data"
269
+ }, {
270
+ "$match":
271
+ {
272
+ "$or": [{
273
+ "$and": [{
274
+ "data.year": {
275
+ "$in": select_year
276
+ }
277
+ }, {
278
+ "data.season": {
279
+ "$eq": season
280
+ }
281
+ }]
282
+ },]
283
+ }
284
+ }, {
285
+ "$project": {
286
+ "data.year": 1,
287
+ "data.season": 1,
288
+ f"data.{report_type}": 1,
289
+ "_id": 0
290
+ }
291
+ }
292
+ ]
293
+
294
+ else: # 取指定index
295
+ project_stage = {"data.year": 1, "data.season": 1}
296
+ for index in indexes:
297
+ project_stage[f"data.{report_type}.{index}"] = 1
298
+
299
+ pipeline = [
300
+ {
301
+ "$match": {
302
+ "ticker": ticker
303
+ }
304
+ }, {
305
+ "$unwind": "$data"
306
+ }, {
307
+ "$match": {
308
+ "$and": [{
309
+ "data.year": {
310
+ "$in": select_year
311
+ }
312
+ }, {
313
+ "data.season": {
314
+ "$eq": season
315
+ }
316
+ }]
317
+ }
318
+ }, {
319
+ "$project": project_stage
320
+ }
321
+ ]
322
+
323
+ fetched_data = self.collection.aggregate(pipeline).to_list()
324
+
325
+ # 處理計算YoY
326
+ data_dict = StatsProcessor.list_of_dict_to_dict(
327
+ fetched_data, keys=['year', 'season'], data_key=report_type, delimeter='Q')
328
+
329
+ if (use_cal):
330
+ data_with_YoY = self.cal_YoY(data_dict, start_year, end_year, season)
331
+ data_df = pd.DataFrame.from_dict(data_with_YoY)
332
+ data_df = data_df.iloc[:, ::-1].T
333
+ data_dict = data_df.to_dict()
334
+ data_dict = self.get_dict_of_df(data_dict)
335
+ return data_dict
336
+ else:
337
+ data_df = pd.DataFrame.from_dict(data_dict)
338
+ data_df = data_df.iloc[:, ::-1]
339
+ return data_df
@@ -12,51 +12,68 @@ class ValueFetcher(StatsFetcher):
12
12
  def prepare_query(self, start_date, end_date):
13
13
  pipeline = super().prepare_query()
14
14
 
15
- pipeline.append({
16
- "$project": {
17
- "_id": 0,
18
- "ticker": 1,
19
- "company_name": 1,
20
- "daily_data": {
21
- "$map": {
22
- "input": {
23
- "$filter": {
24
- "input": "$daily_data",
25
- "as": "daily",
26
- "cond": {
27
- "$and": [{
28
- "$gte": ["$$daily.date", start_date]
29
- }, {
30
- "$lte": ["$$daily.date", end_date]
31
- }]
32
- }
33
- }
34
- },
35
- "as": "daily_item",
36
- "in": {
37
- "date": "$$daily_item.date",
38
- "close": "$$daily_item.close",
39
- "P_B": "$$daily_item.P_B",
40
- "P_E": "$$daily_item.P_E",
41
- "P_FCF": "$$daily_item.P_FCF",
42
- "P_S": "$$daily_item.P_S",
43
- "EV_OPI": "$$daily_item.EV_OPI",
44
- "EV_EBIT": "$$daily_item.EV_EBIT",
45
- "EV_EBITDA": "$$daily_item.EV_EBITDA",
46
- "EV_S": "$$daily_item.EV_S"
47
- }
15
+ pipeline.append(
16
+ {
17
+ "$project":
18
+ {
19
+ "_id": 0,
20
+ "ticker": 1,
21
+ "company_name": 1,
22
+ "daily_data":
23
+ {
24
+ "$map":
25
+ {
26
+ "input":
27
+ {
28
+ "$filter":
29
+ {
30
+ "input": "$daily_data",
31
+ "as": "daily",
32
+ "cond":
33
+ {
34
+ "$and":
35
+ [
36
+ {
37
+ "$gte": ["$$daily.date", start_date]
38
+ }, {
39
+ "$lte": ["$$daily.date", end_date]
40
+ }
41
+ ]
42
+ }
43
+ }
44
+ },
45
+ "as": "daily_item",
46
+ "in":
47
+ {
48
+ "date": "$$daily_item.date",
49
+ "close": "$$daily_item.close",
50
+ "P_B": "$$daily_item.P_B",
51
+ "P_E": "$$daily_item.P_E",
52
+ "P_FCF": "$$daily_item.P_FCF",
53
+ "P_S": "$$daily_item.P_S",
54
+ "EV_OPI": "$$daily_item.EV_OPI",
55
+ "EV_EBIT": "$$daily_item.EV_EBIT",
56
+ "EV_EBITDA": "$$daily_item.EV_EBITDA",
57
+ "EV_S": "$$daily_item.EV_S"
58
+ }
59
+ }
60
+ },
61
+ "yearly_data": 1
48
62
  }
49
- },
50
- "yearly_data": 1
51
- }
52
- })
63
+ })
53
64
 
54
65
  return pipeline
55
66
 
67
+ def collect_data(self, start_date, end_date):
68
+ pipeline = self.prepare_query(start_date, end_date)
69
+
70
+ fetched_data = list(self.collection.aggregate(pipeline))
71
+
72
+ return fetched_data[0]
73
+
56
74
  def query_data(self):
57
75
  try:
58
- latest_time = StatsDateTime.get_latest_time(
59
- self.ticker, self.collection)['last_update_time']
76
+ latest_time = StatsDateTime.get_latest_time(self.ticker, self.collection)['last_update_time']
60
77
  target_year = latest_time['daily_data']['last_update'].year
61
78
  start_date = latest_time['daily_data']['last_update'] - timedelta(days=31)
62
79
  end_date = latest_time['daily_data']['last_update']
@@ -79,7 +96,7 @@ class ValueFetcher(StatsFetcher):
79
96
  )
80
97
 
81
98
  return fetched_data
82
-
99
+
83
100
  def query_value_serie(self):
84
101
  """
85
102
  回傳指定公司的歷來評價
@@ -104,28 +121,32 @@ class ValueFetcher(StatsFetcher):
104
121
  }
105
122
  },
106
123
  {
107
- "$project": {
108
- "_id": 0,
109
- "ticker": 1,
110
- "company_name": 1,
111
- "daily_data": {
112
- "$map": {
113
- "input": "$daily_data", # 正確地指定要處理的陣列
114
- "as": "daily", # 每個元素的名稱
115
- "in": {
116
- "date": "$$daily.date",
117
- "P_E": "$$daily.P_E",
118
- "P_FCF": "$$daily.P_FCF",
119
- "P_B": "$$daily.P_B",
120
- "P_S": "$$daily.P_S",
121
- "EV_OPI": "$$daily.EV_OPI",
122
- "EV_EBIT": "$$daily.EV_EBIT",
123
- "EV_EBITDA": "$$daily.EV_EBITDA",
124
- "EV_S": "$$daily.EV_S"
124
+ "$project":
125
+ {
126
+ "_id": 0,
127
+ "ticker": 1,
128
+ "company_name": 1,
129
+ "daily_data":
130
+ {
131
+ "$map":
132
+ {
133
+ "input": "$daily_data", # 正確地指定要處理的陣列
134
+ "as": "daily", # 每個元素的名稱
135
+ "in":
136
+ {
137
+ "date": "$$daily.date",
138
+ "P_E": "$$daily.P_E",
139
+ "P_FCF": "$$daily.P_FCF",
140
+ "P_B": "$$daily.P_B",
141
+ "P_S": "$$daily.P_S",
142
+ "EV_OPI": "$$daily.EV_OPI",
143
+ "EV_EBIT": "$$daily.EV_EBIT",
144
+ "EV_EBITDA": "$$daily.EV_EBITDA",
145
+ "EV_S": "$$daily.EV_S"
146
+ }
147
+ }
125
148
  }
126
- }
127
149
  }
128
- }
129
150
  }
130
151
  ]
131
152
 
@@ -133,21 +154,17 @@ class ValueFetcher(StatsFetcher):
133
154
  fetched_data = fetched_data[0]
134
155
 
135
156
  value_keys = ["P_E", "P_FCF", "P_B", "P_S", "EV_OPI", "EV_EBIT", "EV_EVITDA", "EV_S"]
136
- return_dict = {
137
- value_key: dict() for value_key in value_keys
138
- }
157
+ return_dict = {value_key: dict() for value_key in value_keys}
139
158
 
140
159
  for value_key in value_keys:
141
160
  for data in fetched_data['daily_data']:
142
161
  if (value_key not in data.keys()):
143
162
  continue
144
163
  else:
145
- return_dict[value_key].update({
146
- data['date']: data[value_key]
147
- })
164
+ return_dict[value_key].update({data['date']: data[value_key]})
148
165
 
149
166
  return_dict = {
150
- value_key: pd.DataFrame.from_dict(value_dict, orient = 'index', columns = [value_key])
167
+ value_key: pd.DataFrame.from_dict(value_dict, orient='index', columns=[value_key])
151
168
  for value_key, value_dict in return_dict.items()
152
169
  }
153
170
  return return_dict
@@ -1,4 +1,5 @@
1
1
  from .datetime import StatsDateTime
2
2
  from .db_client import DBClient
3
3
  from .data_process import StatsProcessor
4
- from .fetcher import StatsFetcher
4
+ from .fetcher import StatsFetcher
5
+ from .calculate_value import YoY_Calculator
@@ -0,0 +1,26 @@
1
+ class YoY_Calculator:
2
+ def __init__(self):
3
+ pass
4
+
5
+ @classmethod
6
+ def cal_growth(cls, target_value: float, past_value: float, delta: int):
7
+ """
8
+ 計算成長率以及年化成長率
9
+ target_value: float,這個時間的數值
10
+ past_value: float,過去的這個時間數值
11
+ delta: int,代表隔了幾年/季 delta > 1 時改以年化成長率計算
12
+ """
13
+ try:
14
+ if (delta > 1):
15
+ YoY = ((target_value / past_value)**(1 / delta)) - 1
16
+
17
+ else:
18
+ YoY = ((target_value - past_value) / past_value)
19
+
20
+ except Exception as e:
21
+ return None
22
+
23
+ if (isinstance(YoY, complex)): # 年化成長率有複數問題
24
+ return None
25
+
26
+ return YoY
@@ -20,7 +20,6 @@ class StatsProcessor:
20
20
  """
21
21
  1. 讀檔: txt / yaml
22
22
  2. 將巢狀dictionary / DataFrame扁平化
23
-
24
23
  """
25
24
 
26
25
  @classmethod
@@ -215,3 +214,59 @@ class StatsProcessor:
215
214
  return int(np.round(value).item())
216
215
  else:
217
216
  return value
217
+
218
+ @classmethod
219
+ def list_of_dict_to_dict(
220
+ cls,
221
+ data_list: list,
222
+ key: str = "",
223
+ keys: list = [],
224
+ delimeter: str = "_",
225
+ data_key: str = "Q"
226
+ ):
227
+ """
228
+ TEJ DB 用
229
+ List[Dict] -> Dict[Dict]
230
+ input:
231
+ data_list(List):
232
+ [
233
+ { "data":
234
+ {
235
+ "year": 2021...
236
+ "season": 1,
237
+ "Q": {}...
238
+
239
+ }
240
+ }
241
+ ]
242
+
243
+ key(str): 選擇哪一個key作為轉化後的index
244
+ delimeter(str): 多個key時要用甚麼分隔
245
+ return:
246
+ {
247
+ "2021" : {# Q下的資料} ...
248
+ }
249
+
250
+ or (keys = ['year', 'season'])
251
+ {
252
+ "2021Q2" : {}
253
+ }
254
+ """
255
+ assert (key or keys), "func list_of_dict_to_dict must have argument \"key\" or \"keys\""
256
+
257
+ return_dict = {}
258
+ if (key):
259
+ keys = [key]
260
+ for data in data_list:
261
+ data = data['data']
262
+
263
+ pop_keys = []
264
+
265
+ for key in keys:
266
+ assert (key in data.keys())
267
+ pop_keys.append(str(data.pop(key)))
268
+
269
+ pop_key = delimeter.join(pop_keys)
270
+ return_dict[pop_key] = data[data_key]
271
+
272
+ return return_dict
@@ -1,12 +1,18 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: neurostats_API
3
- Version: 0.0.14
3
+ Version: 0.0.16
4
4
  Summary: The service of NeuroStats website
5
5
  Home-page: https://github.com/NeurowattStats/NeuroStats_API.git
6
6
  Author: JasonWang@Neurowatt
7
7
  Author-email: jason@neurowatt.ai
8
8
  Requires-Python: >=3.6
9
9
  Description-Content-Type: text/markdown
10
+ Requires-Dist: numpy>=2.1.0
11
+ Requires-Dist: pandas>=2.2.0
12
+ Requires-Dist: pymongo
13
+ Requires-Dist: pytz
14
+ Requires-Dist: python-dotenv
15
+ Requires-Dist: yfinance
10
16
 
11
17
  # neurostats_API
12
18
 
@@ -83,7 +89,7 @@ pip install neurostats-API
83
89
  ```Python
84
90
  >>> import neurostats_API
85
91
  >>> print(neurostats_API.__version__)
86
- 0.0.14
92
+ 0.0.16
87
93
  ```
88
94
 
89
95
  ### 得到最新一期的評價資料與歷年評價
@@ -667,7 +673,121 @@ fetcher.query()
667
673
 
668
674
  請注意`range`, `last_range`, `52week_range`這三個項目型態為字串,其餘為float
669
675
 
676
+
677
+ ## TEJ 相關
678
+ ### 會計師簽證財務資料
679
+ ```Python
680
+ from neurostats_API import FinanceReportFetcher
681
+
682
+ mongo_uri = <MongoDB 的 URI>
683
+ db_name = 'company' # 連接的DB名稱
684
+ collection_name = "TWN/AINVFQ1" # 連接的collection對象
685
+
686
+ fetcher = FinanceReportFetcher(
687
+ mongo_uri = mongo_uri,
688
+ db_name = db_name,
689
+ collection_name = collection_name
690
+ )
691
+
692
+ data = fetcher.get(
693
+ ticker = "2330" # 任意的股票代碼
694
+ fetch_mode = fetcher.FetchMode.QOQ_NOCAL # 取得模式
695
+ start_date = "2005-01-01",
696
+ end_date = "2024-12-31",
697
+ report_type = "Q",
698
+ indexes = []
699
+ ) # -> pd.DataFrame or Dict[pd.DataFrame]
700
+ ```
701
+ - `ticker`: 股票代碼
702
+
703
+ - `fetch_mode` : 取得模式,為`fetcher.YOY_NOCAL` 或 `fetcher.QOQ_NOCAL`
704
+ - `YOY_NOCAL`: 以end_date為準,取得與end_date同季的歷年資料,時間範圍以start_date為起始
705
+ > 例如`start_date = "2020-07-01"`, `end_date = "2024-01-01"`,會回傳2020~2024的第一季資料
706
+
707
+ - `QOQ_NOCAL`: 時間範圍內的每季資料
708
+
709
+ - `QOQ`: 時間範圍內每季的每個index的數值以及QoQ
710
+
711
+ - `YoY`: 以end_date為準,取得與end_date同季的歷年資料以及成長率,時間範圍以start_date為起始
712
+
713
+ - `start_date`: 開始日期,不設定時預設為`2005-01-01`
714
+
715
+ - `end_date`: 結束日期,不設定時預設為資料庫最新資料的日期
716
+
717
+ - `report_type`: 選擇哪種報告,預設為`Q`
718
+ - `A`: 當年累計
719
+ - `Q`: 當季數值
720
+ - `TTM`: 移動四季 (包括當季在內,往前累計四個季度)
721
+
722
+ - `indexes`: 選擇的column,需要以TEJ提供的欄位名稱為準,不提供時或提供`[]`會回傳全部column
723
+ - 範例輸入: `['bp41', 'bp51']`
724
+
725
+ [TEJ資料集連結](https://tquant.tejwin.com/%E8%B3%87%E6%96%99%E9%9B%86/)
726
+ 請看 `會計師簽證財務資料`
727
+
728
+ #### 回傳資料
729
+ ##### `YOY_NOCAL` 與 `QOQ_NOCAL`
730
+ 為回傳`pd.DataFrame`,column名稱為<年份>Q<季>, row名稱為指定財報項目
731
+ ```Python
732
+ # fetch_mode = fetcher.FetchMode.QOQ_NOCAL
733
+ 2024Q3 2024Q2 2024Q1
734
+ bp41 7.082005e+07 6.394707e+07 5.761001e+07
735
+ bp51 3.111298e+09 3.145373e+09 3.091985e+09
736
+
737
+ # fetch_mode = fetcher.FetchMode.YOY_NOCAL
738
+ 2024Q3 2023Q3 2022Q3
739
+ bp41 7.082005e+07 5.377231e+07 6.201822e+07
740
+ bp51 3.111298e+09 3.173919e+09 2.453840e+09
741
+ ```
742
+
743
+ ##### `YOY` 與 `QOQ`
744
+ 回傳為`Dict[pd.DataFrame]`, key 為指定的index, DataFrame中則是該index歷年的數值與成長率
745
+ ```Python
746
+ # fetch_mode = fetcher.FetchMode.QOQ
747
+ {
748
+ 'bp41':
749
+ 2024Q3 2024Q2 2024Q1
750
+ value 7.082005e+07 6.394707e+07 5.761001e+07
751
+ growth 1.074791e-01 1.099994e-01 5.532101e-03,
752
+ 'bp51':
753
+ 2024Q3 2024Q2 2024Q1
754
+ value 3.111298e+09 3.145373e+09 3.091985e+09
755
+ growth -1.083335e-02 1.726663e-02 -4.159542e-03
756
+ }
757
+
758
+ # fetch_mode = fetcher.FetchMode.YOY
759
+ {
760
+ 'bp41':
761
+ 2024Q3 2023Q3 2022Q3
762
+ value 7.082005e+07 5.377231e+07 6.201822e+07
763
+ YoY_1 NaN NaN 4.130744e-01
764
+ YoY_3 1.729171e-01 9.556684e-02 1.883274e-01
765
+ YoY_5 1.389090e-01 1.215242e-01 1.642914e-01
766
+ YoY_10 1.255138e-01 1.356297e-01 1.559702e-01,
767
+ 'bp51':
768
+ 2024Q3 2023Q3 2022Q3
769
+ value 3.111298e+09 3.173919e+09 2.453840e+09
770
+ YoY_1 NaN NaN 3.179539e-01
771
+ YoY_3 1.866752e-01 2.766851e-01 2.638677e-01
772
+ YoY_5 2.068132e-01 2.479698e-01 1.815106e-01
773
+ YoY_10 1.420500e-01 1.586797e-01 1.551364e-01
774
+ }
775
+ ```
776
+
777
+
670
778
  ## 版本紀錄
779
+ ## 0.0.16
780
+ - 處理ValueFetcher的error #issue76
781
+
782
+ - tej_fetcher新增 QOQ, YOY功能
783
+
784
+ ## 0.0.15
785
+ - TechFetcher中新增指數條件
786
+
787
+ - 新增tej_fetcher索取TEJ相關的資料
788
+
789
+ - package新增depensnecy,可以安裝需要的相關package
790
+
671
791
  ## 0.0.14
672
792
  - 修改部分財報資料錯誤的乘以1000的問題
673
793
 
@@ -1,28 +1,30 @@
1
- neurostats_API/__init__.py,sha256=zDrZHoj7CyPMzwVv9Yxva_-e95Gd1sRJSB99gLu9kXU,20
1
+ neurostats_API/__init__.py,sha256=5ToELVqNOIdVJrMj5G8JvbyRIjvo1FxcP6e-a-iMe1Y,261
2
2
  neurostats_API/cli.py,sha256=UJSWLIw03P24p-gkBb6JSEI5dW5U12UvLf1L8HjQD-o,873
3
3
  neurostats_API/main.py,sha256=QcsfmWivg2Dnqw3MTJWiI0QvEiRs0VuH-BjwQHFCv00,677
4
- neurostats_API/fetchers/__init__.py,sha256=ylYEySHQxcAhUUWEOCGZfmaAg7Mir5MfuEhOjk3POJg,406
4
+ neurostats_API/fetchers/__init__.py,sha256=B4aBwVzf_X-YieEf3fZteU0qmBPVIB9VjrmkyWhLK18,489
5
5
  neurostats_API/fetchers/balance_sheet.py,sha256=sQv4Gk5uoKURLEdh57YknOQWiyVwaXJ2Mw75jxNqUS0,5804
6
- neurostats_API/fetchers/base.py,sha256=NW2SFzrimyAIrdJx1LVmTazelyZOAtcj54kJKHc4Vaw,1662
6
+ neurostats_API/fetchers/base.py,sha256=rcrKW2PTJYfBsxsiGpCYiVTK9pQW4aALYLixIjvNMUk,4890
7
7
  neurostats_API/fetchers/cash_flow.py,sha256=TY7VAWVXkj5-mzH5Iu0sIE-oV8MvGmmDy0URNotNV1E,7614
8
8
  neurostats_API/fetchers/finance_overview.py,sha256=PxUdWY0x030olYMLcCHDBn068JLmCE2RTOce1dxs5vM,27753
9
9
  neurostats_API/fetchers/institution.py,sha256=UrcBc6t7u7CnEwUsf6YmLbbJ8VncdWpq8bCz17q2dgs,11168
10
10
  neurostats_API/fetchers/margin_trading.py,sha256=lQImtNdvaBoSlKhJvQ3DkH3HjSSgKRJz4ZZpyR5-Z4I,10433
11
11
  neurostats_API/fetchers/month_revenue.py,sha256=nixX2llzjCFr2m2YVjxrSfkBusnZPrPb2dRDq1XLGhw,4251
12
12
  neurostats_API/fetchers/profit_lose.py,sha256=EN9Y0iamcAaHMZdjHXO6b_2buLnORssf8ZS7A0hi74s,5896
13
- neurostats_API/fetchers/tech.py,sha256=wH1kkqiETQhF0HAhk-UIiucnZ3EiL85Q-yMWCcVOiFM,11395
14
- neurostats_API/fetchers/value_invest.py,sha256=_eQxuEnIYvksb06QHixGK29Gnwr_3xmI6Tu7dv4J__E,5769
13
+ neurostats_API/fetchers/tech.py,sha256=Hol1bcwJ_ERcnoTXNWlqqaWOuzdl7MeiAjCvzQMZDTg,12269
14
+ neurostats_API/fetchers/tej_finance_report.py,sha256=laXph2ca1LCFocZjjdvtzmm5fcUecHk2Gs5h6-XMSWY,12967
15
+ neurostats_API/fetchers/value_invest.py,sha256=b_x2Dpgs8VBU5HdG8ocKtfIEkqhU-Q0S5n6RxuFuM2g,7467
15
16
  neurostats_API/tools/balance_sheet.yaml,sha256=6XygNG_Ybb1Xkk1e39LMLKr7ATvaCP3xxuwFbgNl6dA,673
16
17
  neurostats_API/tools/cash_flow_percentage.yaml,sha256=fk2Z4eb1JjGFvP134eJatHacB7BgTkBenhDJr83w8RE,1345
17
18
  neurostats_API/tools/finance_overview_dict.yaml,sha256=B9nV75StXkrF3yv2-eezzitlJ38eEK86RD_VY6588gQ,2884
18
19
  neurostats_API/tools/profit_lose.yaml,sha256=iyp9asYJ04vAxk_HBUDse_IBy5oVvYHpwsyACg5YEeg,3029
19
20
  neurostats_API/tools/seasonal_data_field_dict.txt,sha256=X8yc_el6p8BH_3FikTqBVFGsvWdXT6MHXLfKfi44334,8491
20
- neurostats_API/utils/__init__.py,sha256=FTYKRFzW2XVXdnSHXnS3mQQaHlKF9xGqrMsgZZ2kroc,142
21
- neurostats_API/utils/data_process.py,sha256=YKfk3fXkcmwFS_8YxOV2uRLnt9NX3cYPV_XxrCgk8Yo,7597
21
+ neurostats_API/utils/__init__.py,sha256=0tJCRmlJq2aDwcNNW-oEaA9H0OxTJMFvjpVYtG4AvZU,186
22
+ neurostats_API/utils/calculate_value.py,sha256=lUKSsWU76XRmDUcmi4eDjoQxjb3vWpAAKInF9w49VNI,782
23
+ neurostats_API/utils/data_process.py,sha256=A--dzOsu42jRxqqCD41gTtjE5rhEBYmhB6y-AnCvo5U,8986
22
24
  neurostats_API/utils/datetime.py,sha256=XJya4G8b_-ZOaBbMXgQjWh2MC4wc-o6goQ7EQJQMWrQ,773
23
25
  neurostats_API/utils/db_client.py,sha256=OYe6yazcR4Aa6jYmy47JrryUeh2NnKGqY2K_lSZe6i8,455
24
26
  neurostats_API/utils/fetcher.py,sha256=VbrUhjA-GG5AyjPX2SHtFIbZM4dm3jo0RgZzuCbb_Io,40927
25
- neurostats_API-0.0.14.dist-info/METADATA,sha256=MeV1goaFbTQI0ddvr-TvElrggROHEiAixNCqP_gyLkI,25935
26
- neurostats_API-0.0.14.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
27
- neurostats_API-0.0.14.dist-info/top_level.txt,sha256=nSlQPMG0VtXivJyedp4Bkf86EOy2TpW10VGxolXrqnU,15
28
- neurostats_API-0.0.14.dist-info/RECORD,,
27
+ neurostats_API-0.0.16.dist-info/METADATA,sha256=9US1mdwWnOCAnwfsOj-ZLRCfo07p3yd0UfwMKS6989g,29848
28
+ neurostats_API-0.0.16.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
29
+ neurostats_API-0.0.16.dist-info/top_level.txt,sha256=nSlQPMG0VtXivJyedp4Bkf86EOy2TpW10VGxolXrqnU,15
30
+ neurostats_API-0.0.16.dist-info/RECORD,,