neurostats-API 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1 +1,13 @@
1
- __version__='0.0.14'
1
+ __version__='0.0.16'
2
+
3
+ from .fetchers import (
4
+ BalanceSheetFetcher,
5
+ CashFlowFetcher,
6
+ FinanceOverviewFetcher,
7
+ FinanceReportFetcher,
8
+ InstitutionFetcher,
9
+ MarginTradingFetcher,
10
+ MonthRevenueFetcher,
11
+ TechFetcher,
12
+ ProfitLoseFetcher
13
+ )
@@ -2,6 +2,8 @@ from .base import StatsDateTime, StatsFetcher
2
2
  from .balance_sheet import BalanceSheetFetcher
3
3
  from .cash_flow import CashFlowFetcher
4
4
  from .finance_overview import FinanceOverviewFetcher
5
+ from .tej_finance_report import FinanceReportFetcher
6
+ from .tech import TechFetcher
5
7
  from .institution import InstitutionFetcher
6
8
  from .margin_trading import MarginTradingFetcher
7
9
  from .month_revenue import MonthRevenueFetcher
@@ -1,16 +1,18 @@
1
+ import abc
1
2
  from pymongo import MongoClient
2
3
  import pandas as pd
3
4
  import json
4
5
  import pytz
5
6
  from datetime import datetime, timedelta, date
6
- from ..utils import StatsDateTime, StatsProcessor
7
+ from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
7
8
  import yaml
8
9
 
10
+
9
11
  class StatsFetcher:
12
+
10
13
  def __init__(self, ticker, db_client):
11
14
  self.ticker = ticker
12
- self.db = db_client[
13
- "company"] # Replace with your database name
15
+ self.db = db_client["company"] # Replace with your database name
14
16
  self.collection = self.db["twse_stats"]
15
17
 
16
18
  self.timezone = pytz.timezone("Asia/Taipei")
@@ -26,7 +28,6 @@ class StatsFetcher:
26
28
  'grand_total_growth': [f"YoY_{i}" for i in [1, 3, 5, 10]]
27
29
  }
28
30
 
29
-
30
31
  def prepare_query(self):
31
32
  return [
32
33
  {
@@ -37,7 +38,7 @@ class StatsFetcher:
37
38
  ]
38
39
 
39
40
  def collect_data(self, start_date, end_date):
40
- pipeline = self.prepare_query(start_date, end_date)
41
+ pipeline = self.prepare_query()
41
42
 
42
43
  fetched_data = list(self.collection.aggregate(pipeline))
43
44
 
@@ -52,3 +53,102 @@ class StatsFetcher:
52
53
  season = (month - 1) // 3 + 1
53
54
 
54
55
  return StatsDateTime(date, year, month, day, season)
56
+
57
+
58
+ class BaseTEJFetcher(abc.ABC):
59
+
60
+ def __init__(self):
61
+ self.client = None
62
+ self.db = None
63
+ self.collection = None
64
+
65
+ @abc.abstractmethod
66
+ def get(self):
67
+ pass
68
+
69
+ def get_latest_data_time(self, ticker):
70
+ latest_data = self.collection.find_one({"ticker": ticker}, {"last_update": 1, "_id": 0})
71
+
72
+ try:
73
+ latest_date = latest_data['last_update']["latest_data_date"]
74
+ except Exception as e:
75
+ latest_date = None
76
+
77
+ return latest_date
78
+
79
+ def cal_YoY(self, data_dict: dict, start_year: int, end_year: int, season: int):
80
+ year_shifts = [1, 3, 5, 10]
81
+ return_dict = {}
82
+ for year in range(start_year, end_year + 1):
83
+ year_data = data_dict[f"{year}Q{season}"]
84
+ year_keys = list(year_data.keys())
85
+ for key in year_keys:
86
+ if (key in 'season'):
87
+ continue
88
+
89
+ if (isinstance(year_data[key], (int, float))):
90
+ temp_dict = {"value": year_data[key]}
91
+
92
+ for shift in year_shifts:
93
+ this_value = year_data[key]
94
+ try:
95
+ past_year = str(year - shift)
96
+ last_value = data_dict[f"{past_year}Q{season}"][key]
97
+ temp_dict[f"YoY_{shift}"] = YoY_Calculator.cal_growth(this_value, last_value, delta=shift)
98
+ except Exception as e:
99
+ temp_dict[f"YoY_{shift}"] = None
100
+
101
+ year_data[key] = temp_dict
102
+
103
+ else:
104
+ year_data.pop(key)
105
+
106
+ return_dict[f"{year}Q{season}"] = year_data
107
+
108
+ return return_dict
109
+
110
+ def cal_QoQ(self, data_dict):
111
+ return_dict = {}
112
+ for i, time_index in enumerate(data_dict.keys()):
113
+ year, season = time_index.split("Q")
114
+ year = int(year)
115
+ season = int(season)
116
+ if (season == 1):
117
+ last_year = year - 1
118
+ last_season = 4
119
+ else:
120
+ last_year = year
121
+ last_season = season - 1
122
+
123
+ this_data = data_dict[time_index]
124
+ this_keys = list(this_data.keys())
125
+ for key in this_keys:
126
+ if (key in 'season'):
127
+ continue
128
+
129
+ this_value = this_data[key]
130
+
131
+ if (isinstance(this_value, (int, float))):
132
+ temp_dict = {"value": this_value}
133
+
134
+ try:
135
+ last_value = data_dict[f"{last_year}Q{last_season}"][key]['value']
136
+
137
+ temp_dict['growth'] = YoY_Calculator.cal_growth(this_value, last_value, delta=1)
138
+ except Exception as e:
139
+ temp_dict['growth'] = None
140
+
141
+ this_data[key] = temp_dict
142
+
143
+ else:
144
+ this_data.pop(key)
145
+ return_dict[time_index] = this_data
146
+ return return_dict
147
+
148
+ def get_dict_of_df(self, data_dict):
149
+ """
150
+ dict[dict] -> dict[df]
151
+ """
152
+ for key in data_dict.keys():
153
+ data_dict[key] = pd.DataFrame.from_dict(data_dict[key])
154
+ return data_dict
@@ -1,9 +1,16 @@
1
1
  from .base import StatsFetcher
2
2
  import pandas as pd
3
+ import yfinance as yf
3
4
 
4
5
  class TechFetcher(StatsFetcher):
5
6
 
6
7
  def __init__(self, ticker:str, db_client):
8
+
9
+ """
10
+ The Capitalization-Weighted Index includes the following tickers:
11
+ ['GSPC', 'IXIC', 'DJI', 'TWII']
12
+ """
13
+
7
14
  super().__init__(ticker, db_client)
8
15
  self.full_ohlcv = self._get_ohlcv()
9
16
  self.basic_indexes = ['SMA5', 'SMA20', 'SMA60', 'EMA5', 'EMA20',
@@ -40,16 +47,36 @@ class TechFetcher(StatsFetcher):
40
47
  )
41
48
 
42
49
  def _get_ohlcv(self):
43
- query = {'ticker': self.ticker}
44
- ticker_full = list(self.collection.find(query))
50
+
51
+ if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
52
+
53
+ full_tick = f'^{self.ticker}'
54
+ yf_ticker = yf.Ticker(full_tick)
55
+ origin_df = yf_ticker.history(period="10y")
56
+ origin_df = origin_df.reset_index()
57
+ origin_df["Date"] = pd.to_datetime(origin_df["Date"]).dt.date
58
+ df = origin_df.rename(
59
+ columns={
60
+ "Date": "date",
61
+ "Open": "open",
62
+ "High": "high",
63
+ "Low": "low",
64
+ "Close": "close",
65
+ "Volume": "volume"
66
+ }
67
+ )
68
+ else:
69
+
70
+ query = {'ticker': self.ticker}
71
+ ticker_full = list(self.collection.find(query))
45
72
 
46
- if not ticker_full:
47
- raise ValueError(f"No data found for ticker: {self.ticker}")
73
+ if not ticker_full:
74
+ raise ValueError(f"No data found for ticker: {self.ticker}")
48
75
 
49
- if 'daily_data' not in ticker_full[0] or ticker_full[0]['daily_data'] is None:
50
- raise KeyError("Missing 'daily_data' in the retrieved data")
76
+ if 'daily_data' not in ticker_full[0] or ticker_full[0]['daily_data'] is None:
77
+ raise KeyError("Missing 'daily_data' in the retrieved data")
51
78
 
52
- df = pd.DataFrame(ticker_full[0]['daily_data'])
79
+ df = pd.DataFrame(ticker_full[0]['daily_data'])
53
80
 
54
81
  selected_cols = ['date','open','high','low','close','volume']
55
82
 
@@ -0,0 +1,339 @@
1
+ from .base import BaseTEJFetcher
2
+ from datetime import datetime
3
+ from enum import Enum
4
+ import pandas as pd
5
+ from pymongo import MongoClient
6
+ from ..utils import StatsProcessor, YoY_Calculator
7
+ import warnings
8
+
9
+
10
+ class FinanceReportFetcher(BaseTEJFetcher):
11
+
12
+ class FetchMode(Enum):
13
+ YOY = 1
14
+ QOQ = 2
15
+ YOY_NOCAL = 3
16
+ QOQ_NOCAL = 4
17
+
18
+ def __init__(self, mongo_uri, db_name="company", collection_name="TWN/AINVFQ1"):
19
+ self.client = MongoClient(mongo_uri)
20
+ self.db = self.client[db_name]
21
+ self.collection = self.db[collection_name]
22
+
23
+ # yapf: disabled
24
+ self.check_index = {
25
+ 'coid', 'mdate', 'key3', 'no', 'sem', 'merg', 'curr', 'annd', 'fin_ind', 'bp11', 'bp21', 'bp22', 'bp31',
26
+ 'bp41', 'bp51', 'bp53', 'bp61', 'bp62', 'bp63', 'bp64', 'bp65', 'bf11', 'bf12', 'bf21', 'bf22', 'bf41',
27
+ 'bf42', 'bf43', 'bf44', 'bf45', 'bf99', 'bsca', 'bsnca', 'bsta', 'bscl', 'bsncl', 'bstl', 'bsse', 'bslse',
28
+ 'debt', 'quick', 'ppe', 'ar', 'ip12', 'ip22', 'ip31', 'ip51', 'iv41', 'if11', 'isibt', 'isni', 'isnip',
29
+ 'eps', 'ispsd', 'gm', 'opi', 'nri', 'ri', 'nopi', 'ebit', 'cip31', 'cscfo', 'cscfi', 'cscff', 'person',
30
+ 'shares', 'wavg', 'taxrate', 'r104', 'r115', 'r105', 'r106', 'r107', 'r108', 'r201', 'r112', 'r401', 'r402',
31
+ 'r403', 'r404', 'r405', 'r408', 'r409', 'r410', 'r502', 'r501', 'r205', 'r505', 'r517', 'r512', 'r509',
32
+ 'r608', 'r616', 'r610', 'r607', 'r613', 'r612', 'r609', 'r614', 'r611', 'r307', 'r304', 'r305', 'r306',
33
+ 'r316', 'r834'
34
+ } # yapf: enabled
35
+
36
+ def get(
37
+ self,
38
+ ticker,
39
+ fetch_mode: FetchMode = FetchMode.QOQ_NOCAL,
40
+ start_date: str = None,
41
+ end_date: str = None,
42
+ report_type: str = "Q",
43
+ indexes: list = []):
44
+ """
45
+ 基礎的query function
46
+ ticker(str): 股票代碼
47
+ start_date(str): 開頭日期範圍
48
+ end_date(str): = 結束日期範圍
49
+ report_type(str): 報告型態 {"A", "Q", "TTM"}
50
+ fetch_mode(class FetchMode):
51
+ YoY : 起始日期到結束日期範圍內,特定該季的資料
52
+ QoQ : 起始日期到結束日期內,每季的資料(與上一季成長率)
53
+ indexes(List): 指定的index
54
+ """
55
+ # 確認indexes中是否有錯誤的index,有的話回傳warning
56
+ if (indexes and self.check_index):
57
+ indexes = set(indexes)
58
+ difference = indexes - self.check_index
59
+ if (difference):
60
+ warnings.warn(f"{list(difference)} 沒有出現在資料表中,請確認column名稱是否正確", UserWarning)
61
+
62
+ if (fetch_mode in {self.FetchMode.QOQ, self.FetchMode.QOQ_NOCAL}):
63
+ if (not start_date):
64
+ warnings.warn("No start_date specified, use default date = \"2005-01-01\"", UserWarning)
65
+ start_date = datetime.strptime("2005-01-01", "%Y-%m-%d")
66
+ if (not end_date):
67
+ warnings.warn("No end_date specified, use default date = today", UserWarning)
68
+ end_date = datetime.today()
69
+
70
+ assert (start_date <= end_date)
71
+ start_date = datetime.strptime(start_date, "%Y-%m-%d")
72
+ end_date = datetime.strptime(end_date, "%Y-%m-%d")
73
+
74
+ start_year = start_date.year
75
+ start_season = (start_date.month - 1) // 4 + 1
76
+ end_year = end_date.year
77
+ end_season = (end_date.month - 1) // 4 + 1
78
+
79
+ if (fetch_mode == self.FetchMode.QOQ):
80
+ use_cal = True
81
+ else:
82
+ use_cal = False
83
+
84
+ data_df = self.get_QoQ_data(
85
+ ticker=ticker,
86
+ start_year=start_year,
87
+ start_season=start_season,
88
+ end_year=end_year,
89
+ end_season=end_season,
90
+ report_type=report_type,
91
+ indexes=indexes,
92
+ use_cal=use_cal)
93
+
94
+ return data_df
95
+
96
+ elif (fetch_mode in {self.FetchMode.YOY, self.FetchMode.YOY_NOCAL}):
97
+ start_date = datetime.strptime(start_date, "%Y-%m-%d")
98
+ start_year = start_date.year
99
+ end_date = self.get_latest_data_time(ticker)
100
+ if (not end_date):
101
+ end_date = datetime.today()
102
+
103
+ end_year = end_date.year
104
+ season = (end_date.month - 1) // 4 + 1
105
+
106
+ if (fetch_mode == self.FetchMode.YOY):
107
+ use_cal = True
108
+ else:
109
+ use_cal = False
110
+
111
+ data_df = self.get_YoY_data(
112
+ ticker=ticker,
113
+ start_year=start_year,
114
+ end_year=end_year,
115
+ season=season,
116
+ report_type=report_type,
117
+ indexes=indexes,
118
+ use_cal=use_cal)
119
+
120
+ return data_df
121
+
122
+ def get_QoQ_data(
123
+ self, ticker, start_year, start_season, end_year, end_season, report_type="Q", indexes=[], use_cal=False):
124
+ """
125
+ 取得時間範圍內每季資料
126
+ """
127
+ if (use_cal):
128
+ if (start_season == 1):
129
+ lower_bound_year = start_year - 1
130
+ lower_bound_season = 4
131
+
132
+ else:
133
+ lower_bound_year = start_year
134
+ lower_bound_season = start_season - 1
135
+
136
+ else:
137
+ lower_bound_year = start_year,
138
+ lower_bound_season = start_season
139
+
140
+ if (not indexes): # 沒有指定 -> 取全部
141
+ pipeline = [
142
+ {
143
+ "$match": {
144
+ "ticker": ticker
145
+ }
146
+ }, {
147
+ "$unwind": "$data"
148
+ }, {
149
+ "$match":
150
+ {
151
+ "$or":
152
+ [
153
+ {
154
+ "data.year": {
155
+ "$gt": start_year,
156
+ "$lt": end_year
157
+ }
158
+ }, {
159
+ "data.year": start_year,
160
+ "data.season": {
161
+ "$gte": start_season
162
+ }
163
+ }, {
164
+ "data.year": end_year,
165
+ "data.season": {
166
+ "$lte": end_season
167
+ }
168
+ }, {
169
+ "data.year": lower_bound_year,
170
+ "data.season": lower_bound_season
171
+ }
172
+ ]
173
+ }
174
+ }, {
175
+ "$project": {
176
+ "data.year": 1,
177
+ "data.season": 1,
178
+ f"data.{report_type}": 1,
179
+ "_id": 0
180
+ }
181
+ }
182
+ ]
183
+
184
+ else: # 取指定index
185
+ project_stage = {"data.year": 1, "data.season": 1}
186
+ for index in indexes:
187
+ project_stage[f"data.{report_type}.{index}"] = 1
188
+
189
+ pipeline = [
190
+ {
191
+ "$match": {
192
+ "ticker": ticker
193
+ }
194
+ }, {
195
+ "$unwind": "$data"
196
+ }, {
197
+ "$match":
198
+ {
199
+ "$or":
200
+ [
201
+ {
202
+ "data.year": {
203
+ "$gt": start_year,
204
+ "$lt": end_year
205
+ }
206
+ }, {
207
+ "data.year": start_year,
208
+ "data.season": {
209
+ "$gte": start_season
210
+ }
211
+ }, {
212
+ "data.year": end_year,
213
+ "data.season": {
214
+ "$lte": end_season
215
+ }
216
+ }, {
217
+ "data.year": lower_bound_year,
218
+ "data.season": lower_bound_season
219
+ }
220
+ ]
221
+ }
222
+ }, {
223
+ "$project": project_stage
224
+ }
225
+ ]
226
+
227
+ fetched_data = self.collection.aggregate(pipeline).to_list()
228
+
229
+ data_dict = StatsProcessor.list_of_dict_to_dict(
230
+ fetched_data, keys=["year", "season"], delimeter="Q", data_key=report_type)
231
+
232
+ if (use_cal):
233
+ data_with_QoQ = self.cal_QoQ(data_dict)
234
+ data_df = pd.DataFrame.from_dict(data_with_QoQ)
235
+ data_df = data_df.iloc[:, 1:]
236
+ data_df = data_df.iloc[:, ::-1].T
237
+ data_dict = data_df.to_dict()
238
+ data_dict = self.get_dict_of_df(data_dict)
239
+ return data_dict
240
+ else:
241
+ data_df = pd.DataFrame.from_dict(data_dict)
242
+ data_df = data_df.iloc[:, ::-1]
243
+ return data_df
244
+
245
+ def get_YoY_data(self, ticker, start_year, end_year, season, report_type="Q", indexes=[], use_cal=False):
246
+ """
247
+ 取得某季歷年資料
248
+ """
249
+ if (use_cal):
250
+ select_year = set()
251
+
252
+ for year in range(start_year, end_year + 1):
253
+ year_shifts = {year, year - 1, year - 3, year - 5, year - 10}
254
+
255
+ select_year = select_year.union(year_shifts)
256
+
257
+ select_year = sorted(list(select_year), reverse=True)
258
+ else:
259
+ select_year = [year for year in range(start_year, end_year + 1)]
260
+
261
+ if (not indexes): # 沒有指定 -> 取全部
262
+ pipeline = [
263
+ {
264
+ "$match": {
265
+ "ticker": ticker
266
+ }
267
+ }, {
268
+ "$unwind": "$data"
269
+ }, {
270
+ "$match":
271
+ {
272
+ "$or": [{
273
+ "$and": [{
274
+ "data.year": {
275
+ "$in": select_year
276
+ }
277
+ }, {
278
+ "data.season": {
279
+ "$eq": season
280
+ }
281
+ }]
282
+ },]
283
+ }
284
+ }, {
285
+ "$project": {
286
+ "data.year": 1,
287
+ "data.season": 1,
288
+ f"data.{report_type}": 1,
289
+ "_id": 0
290
+ }
291
+ }
292
+ ]
293
+
294
+ else: # 取指定index
295
+ project_stage = {"data.year": 1, "data.season": 1}
296
+ for index in indexes:
297
+ project_stage[f"data.{report_type}.{index}"] = 1
298
+
299
+ pipeline = [
300
+ {
301
+ "$match": {
302
+ "ticker": ticker
303
+ }
304
+ }, {
305
+ "$unwind": "$data"
306
+ }, {
307
+ "$match": {
308
+ "$and": [{
309
+ "data.year": {
310
+ "$in": select_year
311
+ }
312
+ }, {
313
+ "data.season": {
314
+ "$eq": season
315
+ }
316
+ }]
317
+ }
318
+ }, {
319
+ "$project": project_stage
320
+ }
321
+ ]
322
+
323
+ fetched_data = self.collection.aggregate(pipeline).to_list()
324
+
325
+ # 處理計算YoY
326
+ data_dict = StatsProcessor.list_of_dict_to_dict(
327
+ fetched_data, keys=['year', 'season'], data_key=report_type, delimeter='Q')
328
+
329
+ if (use_cal):
330
+ data_with_YoY = self.cal_YoY(data_dict, start_year, end_year, season)
331
+ data_df = pd.DataFrame.from_dict(data_with_YoY)
332
+ data_df = data_df.iloc[:, ::-1].T
333
+ data_dict = data_df.to_dict()
334
+ data_dict = self.get_dict_of_df(data_dict)
335
+ return data_dict
336
+ else:
337
+ data_df = pd.DataFrame.from_dict(data_dict)
338
+ data_df = data_df.iloc[:, ::-1]
339
+ return data_df
@@ -12,51 +12,68 @@ class ValueFetcher(StatsFetcher):
12
12
  def prepare_query(self, start_date, end_date):
13
13
  pipeline = super().prepare_query()
14
14
 
15
- pipeline.append({
16
- "$project": {
17
- "_id": 0,
18
- "ticker": 1,
19
- "company_name": 1,
20
- "daily_data": {
21
- "$map": {
22
- "input": {
23
- "$filter": {
24
- "input": "$daily_data",
25
- "as": "daily",
26
- "cond": {
27
- "$and": [{
28
- "$gte": ["$$daily.date", start_date]
29
- }, {
30
- "$lte": ["$$daily.date", end_date]
31
- }]
32
- }
33
- }
34
- },
35
- "as": "daily_item",
36
- "in": {
37
- "date": "$$daily_item.date",
38
- "close": "$$daily_item.close",
39
- "P_B": "$$daily_item.P_B",
40
- "P_E": "$$daily_item.P_E",
41
- "P_FCF": "$$daily_item.P_FCF",
42
- "P_S": "$$daily_item.P_S",
43
- "EV_OPI": "$$daily_item.EV_OPI",
44
- "EV_EBIT": "$$daily_item.EV_EBIT",
45
- "EV_EBITDA": "$$daily_item.EV_EBITDA",
46
- "EV_S": "$$daily_item.EV_S"
47
- }
15
+ pipeline.append(
16
+ {
17
+ "$project":
18
+ {
19
+ "_id": 0,
20
+ "ticker": 1,
21
+ "company_name": 1,
22
+ "daily_data":
23
+ {
24
+ "$map":
25
+ {
26
+ "input":
27
+ {
28
+ "$filter":
29
+ {
30
+ "input": "$daily_data",
31
+ "as": "daily",
32
+ "cond":
33
+ {
34
+ "$and":
35
+ [
36
+ {
37
+ "$gte": ["$$daily.date", start_date]
38
+ }, {
39
+ "$lte": ["$$daily.date", end_date]
40
+ }
41
+ ]
42
+ }
43
+ }
44
+ },
45
+ "as": "daily_item",
46
+ "in":
47
+ {
48
+ "date": "$$daily_item.date",
49
+ "close": "$$daily_item.close",
50
+ "P_B": "$$daily_item.P_B",
51
+ "P_E": "$$daily_item.P_E",
52
+ "P_FCF": "$$daily_item.P_FCF",
53
+ "P_S": "$$daily_item.P_S",
54
+ "EV_OPI": "$$daily_item.EV_OPI",
55
+ "EV_EBIT": "$$daily_item.EV_EBIT",
56
+ "EV_EBITDA": "$$daily_item.EV_EBITDA",
57
+ "EV_S": "$$daily_item.EV_S"
58
+ }
59
+ }
60
+ },
61
+ "yearly_data": 1
48
62
  }
49
- },
50
- "yearly_data": 1
51
- }
52
- })
63
+ })
53
64
 
54
65
  return pipeline
55
66
 
67
+ def collect_data(self, start_date, end_date):
68
+ pipeline = self.prepare_query(start_date, end_date)
69
+
70
+ fetched_data = list(self.collection.aggregate(pipeline))
71
+
72
+ return fetched_data[0]
73
+
56
74
  def query_data(self):
57
75
  try:
58
- latest_time = StatsDateTime.get_latest_time(
59
- self.ticker, self.collection)['last_update_time']
76
+ latest_time = StatsDateTime.get_latest_time(self.ticker, self.collection)['last_update_time']
60
77
  target_year = latest_time['daily_data']['last_update'].year
61
78
  start_date = latest_time['daily_data']['last_update'] - timedelta(days=31)
62
79
  end_date = latest_time['daily_data']['last_update']
@@ -79,7 +96,7 @@ class ValueFetcher(StatsFetcher):
79
96
  )
80
97
 
81
98
  return fetched_data
82
-
99
+
83
100
  def query_value_serie(self):
84
101
  """
85
102
  回傳指定公司的歷來評價
@@ -104,28 +121,32 @@ class ValueFetcher(StatsFetcher):
104
121
  }
105
122
  },
106
123
  {
107
- "$project": {
108
- "_id": 0,
109
- "ticker": 1,
110
- "company_name": 1,
111
- "daily_data": {
112
- "$map": {
113
- "input": "$daily_data", # 正確地指定要處理的陣列
114
- "as": "daily", # 每個元素的名稱
115
- "in": {
116
- "date": "$$daily.date",
117
- "P_E": "$$daily.P_E",
118
- "P_FCF": "$$daily.P_FCF",
119
- "P_B": "$$daily.P_B",
120
- "P_S": "$$daily.P_S",
121
- "EV_OPI": "$$daily.EV_OPI",
122
- "EV_EBIT": "$$daily.EV_EBIT",
123
- "EV_EBITDA": "$$daily.EV_EBITDA",
124
- "EV_S": "$$daily.EV_S"
124
+ "$project":
125
+ {
126
+ "_id": 0,
127
+ "ticker": 1,
128
+ "company_name": 1,
129
+ "daily_data":
130
+ {
131
+ "$map":
132
+ {
133
+ "input": "$daily_data", # 正確地指定要處理的陣列
134
+ "as": "daily", # 每個元素的名稱
135
+ "in":
136
+ {
137
+ "date": "$$daily.date",
138
+ "P_E": "$$daily.P_E",
139
+ "P_FCF": "$$daily.P_FCF",
140
+ "P_B": "$$daily.P_B",
141
+ "P_S": "$$daily.P_S",
142
+ "EV_OPI": "$$daily.EV_OPI",
143
+ "EV_EBIT": "$$daily.EV_EBIT",
144
+ "EV_EBITDA": "$$daily.EV_EBITDA",
145
+ "EV_S": "$$daily.EV_S"
146
+ }
147
+ }
125
148
  }
126
- }
127
149
  }
128
- }
129
150
  }
130
151
  ]
131
152
 
@@ -133,21 +154,17 @@ class ValueFetcher(StatsFetcher):
133
154
  fetched_data = fetched_data[0]
134
155
 
135
156
  value_keys = ["P_E", "P_FCF", "P_B", "P_S", "EV_OPI", "EV_EBIT", "EV_EVITDA", "EV_S"]
136
- return_dict = {
137
- value_key: dict() for value_key in value_keys
138
- }
157
+ return_dict = {value_key: dict() for value_key in value_keys}
139
158
 
140
159
  for value_key in value_keys:
141
160
  for data in fetched_data['daily_data']:
142
161
  if (value_key not in data.keys()):
143
162
  continue
144
163
  else:
145
- return_dict[value_key].update({
146
- data['date']: data[value_key]
147
- })
164
+ return_dict[value_key].update({data['date']: data[value_key]})
148
165
 
149
166
  return_dict = {
150
- value_key: pd.DataFrame.from_dict(value_dict, orient = 'index', columns = [value_key])
167
+ value_key: pd.DataFrame.from_dict(value_dict, orient='index', columns=[value_key])
151
168
  for value_key, value_dict in return_dict.items()
152
169
  }
153
170
  return return_dict
@@ -1,4 +1,5 @@
1
1
  from .datetime import StatsDateTime
2
2
  from .db_client import DBClient
3
3
  from .data_process import StatsProcessor
4
- from .fetcher import StatsFetcher
4
+ from .fetcher import StatsFetcher
5
+ from .calculate_value import YoY_Calculator
@@ -0,0 +1,26 @@
1
+ class YoY_Calculator:
2
+ def __init__(self):
3
+ pass
4
+
5
+ @classmethod
6
+ def cal_growth(cls, target_value: float, past_value: float, delta: int):
7
+ """
8
+ 計算成長率以及年化成長率
9
+ target_value: float,這個時間的數值
10
+ past_value: float,過去的這個時間數值
11
+ delta: int,代表隔了幾年/季 delta > 1 時改以年化成長率計算
12
+ """
13
+ try:
14
+ if (delta > 1):
15
+ YoY = ((target_value / past_value)**(1 / delta)) - 1
16
+
17
+ else:
18
+ YoY = ((target_value - past_value) / past_value)
19
+
20
+ except Exception as e:
21
+ return None
22
+
23
+ if (isinstance(YoY, complex)): # 年化成長率有複數問題
24
+ return None
25
+
26
+ return YoY
@@ -20,7 +20,6 @@ class StatsProcessor:
20
20
  """
21
21
  1. 讀檔: txt / yaml
22
22
  2. 將巢狀dictionary / DataFrame扁平化
23
-
24
23
  """
25
24
 
26
25
  @classmethod
@@ -215,3 +214,59 @@ class StatsProcessor:
215
214
  return int(np.round(value).item())
216
215
  else:
217
216
  return value
217
+
218
+ @classmethod
219
+ def list_of_dict_to_dict(
220
+ cls,
221
+ data_list: list,
222
+ key: str = "",
223
+ keys: list = [],
224
+ delimeter: str = "_",
225
+ data_key: str = "Q"
226
+ ):
227
+ """
228
+ TEJ DB 用
229
+ List[Dict] -> Dict[Dict]
230
+ input:
231
+ data_list(List):
232
+ [
233
+ { "data":
234
+ {
235
+ "year": 2021...
236
+ "season": 1,
237
+ "Q": {}...
238
+
239
+ }
240
+ }
241
+ ]
242
+
243
+ key(str): 選擇哪一個key作為轉化後的index
244
+ delimeter(str): 多個key時要用甚麼分隔
245
+ return:
246
+ {
247
+ "2021" : {# Q下的資料} ...
248
+ }
249
+
250
+ or (keys = ['year', 'season'])
251
+ {
252
+ "2021Q2" : {}
253
+ }
254
+ """
255
+ assert (key or keys), "func list_of_dict_to_dict must have argument \"key\" or \"keys\""
256
+
257
+ return_dict = {}
258
+ if (key):
259
+ keys = [key]
260
+ for data in data_list:
261
+ data = data['data']
262
+
263
+ pop_keys = []
264
+
265
+ for key in keys:
266
+ assert (key in data.keys())
267
+ pop_keys.append(str(data.pop(key)))
268
+
269
+ pop_key = delimeter.join(pop_keys)
270
+ return_dict[pop_key] = data[data_key]
271
+
272
+ return return_dict
@@ -1,12 +1,18 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: neurostats_API
3
- Version: 0.0.14
3
+ Version: 0.0.16
4
4
  Summary: The service of NeuroStats website
5
5
  Home-page: https://github.com/NeurowattStats/NeuroStats_API.git
6
6
  Author: JasonWang@Neurowatt
7
7
  Author-email: jason@neurowatt.ai
8
8
  Requires-Python: >=3.6
9
9
  Description-Content-Type: text/markdown
10
+ Requires-Dist: numpy>=2.1.0
11
+ Requires-Dist: pandas>=2.2.0
12
+ Requires-Dist: pymongo
13
+ Requires-Dist: pytz
14
+ Requires-Dist: python-dotenv
15
+ Requires-Dist: yfinance
10
16
 
11
17
  # neurostats_API
12
18
 
@@ -83,7 +89,7 @@ pip install neurostats-API
83
89
  ```Python
84
90
  >>> import neurostats_API
85
91
  >>> print(neurostats_API.__version__)
86
- 0.0.14
92
+ 0.0.16
87
93
  ```
88
94
 
89
95
  ### 得到最新一期的評價資料與歷年評價
@@ -667,7 +673,121 @@ fetcher.query()
667
673
 
668
674
  請注意`range`, `last_range`, `52week_range`這三個項目型態為字串,其餘為float
669
675
 
676
+
677
+ ## TEJ 相關
678
+ ### 會計師簽證財務資料
679
+ ```Python
680
+ from neurostats_API import FinanceReportFetcher
681
+
682
+ mongo_uri = <MongoDB 的 URI>
683
+ db_name = 'company' # 連接的DB名稱
684
+ collection_name = "TWN/AINVFQ1" # 連接的collection對象
685
+
686
+ fetcher = FinanceReportFetcher(
687
+ mongo_uri = mongo_uri,
688
+ db_name = db_name,
689
+ collection_name = collection_name
690
+ )
691
+
692
+ data = fetcher.get(
693
+ ticker = "2330" # 任意的股票代碼
694
+ fetch_mode = fetcher.FetchMode.QOQ_NOCAL # 取得模式
695
+ start_date = "2005-01-01",
696
+ end_date = "2024-12-31",
697
+ report_type = "Q",
698
+ indexes = []
699
+ ) # -> pd.DataFrame or Dict[pd.DataFrame]
700
+ ```
701
+ - `ticker`: 股票代碼
702
+
703
+ - `fetch_mode` : 取得模式,為`fetcher.YOY_NOCAL` 或 `fetcher.QOQ_NOCAL`
704
+ - `YOY_NOCAL`: 以end_date為準,取得與end_date同季的歷年資料,時間範圍以start_date為起始
705
+ > 例如`start_date = "2020-07-01"`, `end_date = "2024-01-01"`,會回傳2020~2024的第一季資料
706
+
707
+ - `QOQ_NOCAL`: 時間範圍內的每季資料
708
+
709
+ - `QOQ`: 時間範圍內每季的每個index的數值以及QoQ
710
+
711
+ - `YoY`: 以end_date為準,取得與end_date同季的歷年資料以及成長率,時間範圍以start_date為起始
712
+
713
+ - `start_date`: 開始日期,不設定時預設為`2005-01-01`
714
+
715
+ - `end_date`: 結束日期,不設定時預設為資料庫最新資料的日期
716
+
717
+ - `report_type`: 選擇哪種報告,預設為`Q`
718
+ - `A`: 當年累計
719
+ - `Q`: 當季數值
720
+ - `TTM`: 移動四季 (包括當季在內,往前累計四個季度)
721
+
722
+ - `indexes`: 選擇的column,需要以TEJ提供的欄位名稱為準,不提供時或提供`[]`會回傳全部column
723
+ - 範例輸入: `['bp41', 'bp51']`
724
+
725
+ [TEJ資料集連結](https://tquant.tejwin.com/%E8%B3%87%E6%96%99%E9%9B%86/)
726
+ 請看 `會計師簽證財務資料`
727
+
728
+ #### 回傳資料
729
+ ##### `YOY_NOCAL` 與 `QOQ_NOCAL`
730
+ 為回傳`pd.DataFrame`,column名稱為<年份>Q<季>, row名稱為指定財報項目
731
+ ```Python
732
+ # fetch_mode = fetcher.FetchMode.QOQ_NOCAL
733
+ 2024Q3 2024Q2 2024Q1
734
+ bp41 7.082005e+07 6.394707e+07 5.761001e+07
735
+ bp51 3.111298e+09 3.145373e+09 3.091985e+09
736
+
737
+ # fetch_mode = fetcher.FetchMode.YOY_NOCAL
738
+ 2024Q3 2023Q3 2022Q3
739
+ bp41 7.082005e+07 5.377231e+07 6.201822e+07
740
+ bp51 3.111298e+09 3.173919e+09 2.453840e+09
741
+ ```
742
+
743
+ ##### `YOY` 與 `QOQ`
744
+ 回傳為`Dict[pd.DataFrame]`, key 為指定的index, DataFrame中則是該index歷年的數值與成長率
745
+ ```Python
746
+ # fetch_mode = fetcher.FetchMode.QOQ
747
+ {
748
+ 'bp41':
749
+ 2024Q3 2024Q2 2024Q1
750
+ value 7.082005e+07 6.394707e+07 5.761001e+07
751
+ growth 1.074791e-01 1.099994e-01 5.532101e-03,
752
+ 'bp51':
753
+ 2024Q3 2024Q2 2024Q1
754
+ value 3.111298e+09 3.145373e+09 3.091985e+09
755
+ growth -1.083335e-02 1.726663e-02 -4.159542e-03
756
+ }
757
+
758
+ # fetch_mode = fetcher.FetchMode.YOY
759
+ {
760
+ 'bp41':
761
+ 2024Q3 2023Q3 2022Q3
762
+ value 7.082005e+07 5.377231e+07 6.201822e+07
763
+ YoY_1 NaN NaN 4.130744e-01
764
+ YoY_3 1.729171e-01 9.556684e-02 1.883274e-01
765
+ YoY_5 1.389090e-01 1.215242e-01 1.642914e-01
766
+ YoY_10 1.255138e-01 1.356297e-01 1.559702e-01,
767
+ 'bp51':
768
+ 2024Q3 2023Q3 2022Q3
769
+ value 3.111298e+09 3.173919e+09 2.453840e+09
770
+ YoY_1 NaN NaN 3.179539e-01
771
+ YoY_3 1.866752e-01 2.766851e-01 2.638677e-01
772
+ YoY_5 2.068132e-01 2.479698e-01 1.815106e-01
773
+ YoY_10 1.420500e-01 1.586797e-01 1.551364e-01
774
+ }
775
+ ```
776
+
777
+
670
778
  ## 版本紀錄
779
+ ## 0.0.16
780
+ - 處理ValueFetcher的error #issue76
781
+
782
+ - tej_fetcher新增 QOQ, YOY功能
783
+
784
+ ## 0.0.15
785
+ - TechFetcher中新增指數條件
786
+
787
+ - 新增tej_fetcher索取TEJ相關的資料
788
+
789
+ - package新增depensnecy,可以安裝需要的相關package
790
+
671
791
  ## 0.0.14
672
792
  - 修改部分財報資料錯誤的乘以1000的問題
673
793
 
@@ -1,28 +1,30 @@
1
- neurostats_API/__init__.py,sha256=zDrZHoj7CyPMzwVv9Yxva_-e95Gd1sRJSB99gLu9kXU,20
1
+ neurostats_API/__init__.py,sha256=5ToELVqNOIdVJrMj5G8JvbyRIjvo1FxcP6e-a-iMe1Y,261
2
2
  neurostats_API/cli.py,sha256=UJSWLIw03P24p-gkBb6JSEI5dW5U12UvLf1L8HjQD-o,873
3
3
  neurostats_API/main.py,sha256=QcsfmWivg2Dnqw3MTJWiI0QvEiRs0VuH-BjwQHFCv00,677
4
- neurostats_API/fetchers/__init__.py,sha256=ylYEySHQxcAhUUWEOCGZfmaAg7Mir5MfuEhOjk3POJg,406
4
+ neurostats_API/fetchers/__init__.py,sha256=B4aBwVzf_X-YieEf3fZteU0qmBPVIB9VjrmkyWhLK18,489
5
5
  neurostats_API/fetchers/balance_sheet.py,sha256=sQv4Gk5uoKURLEdh57YknOQWiyVwaXJ2Mw75jxNqUS0,5804
6
- neurostats_API/fetchers/base.py,sha256=NW2SFzrimyAIrdJx1LVmTazelyZOAtcj54kJKHc4Vaw,1662
6
+ neurostats_API/fetchers/base.py,sha256=rcrKW2PTJYfBsxsiGpCYiVTK9pQW4aALYLixIjvNMUk,4890
7
7
  neurostats_API/fetchers/cash_flow.py,sha256=TY7VAWVXkj5-mzH5Iu0sIE-oV8MvGmmDy0URNotNV1E,7614
8
8
  neurostats_API/fetchers/finance_overview.py,sha256=PxUdWY0x030olYMLcCHDBn068JLmCE2RTOce1dxs5vM,27753
9
9
  neurostats_API/fetchers/institution.py,sha256=UrcBc6t7u7CnEwUsf6YmLbbJ8VncdWpq8bCz17q2dgs,11168
10
10
  neurostats_API/fetchers/margin_trading.py,sha256=lQImtNdvaBoSlKhJvQ3DkH3HjSSgKRJz4ZZpyR5-Z4I,10433
11
11
  neurostats_API/fetchers/month_revenue.py,sha256=nixX2llzjCFr2m2YVjxrSfkBusnZPrPb2dRDq1XLGhw,4251
12
12
  neurostats_API/fetchers/profit_lose.py,sha256=EN9Y0iamcAaHMZdjHXO6b_2buLnORssf8ZS7A0hi74s,5896
13
- neurostats_API/fetchers/tech.py,sha256=wH1kkqiETQhF0HAhk-UIiucnZ3EiL85Q-yMWCcVOiFM,11395
14
- neurostats_API/fetchers/value_invest.py,sha256=_eQxuEnIYvksb06QHixGK29Gnwr_3xmI6Tu7dv4J__E,5769
13
+ neurostats_API/fetchers/tech.py,sha256=Hol1bcwJ_ERcnoTXNWlqqaWOuzdl7MeiAjCvzQMZDTg,12269
14
+ neurostats_API/fetchers/tej_finance_report.py,sha256=laXph2ca1LCFocZjjdvtzmm5fcUecHk2Gs5h6-XMSWY,12967
15
+ neurostats_API/fetchers/value_invest.py,sha256=b_x2Dpgs8VBU5HdG8ocKtfIEkqhU-Q0S5n6RxuFuM2g,7467
15
16
  neurostats_API/tools/balance_sheet.yaml,sha256=6XygNG_Ybb1Xkk1e39LMLKr7ATvaCP3xxuwFbgNl6dA,673
16
17
  neurostats_API/tools/cash_flow_percentage.yaml,sha256=fk2Z4eb1JjGFvP134eJatHacB7BgTkBenhDJr83w8RE,1345
17
18
  neurostats_API/tools/finance_overview_dict.yaml,sha256=B9nV75StXkrF3yv2-eezzitlJ38eEK86RD_VY6588gQ,2884
18
19
  neurostats_API/tools/profit_lose.yaml,sha256=iyp9asYJ04vAxk_HBUDse_IBy5oVvYHpwsyACg5YEeg,3029
19
20
  neurostats_API/tools/seasonal_data_field_dict.txt,sha256=X8yc_el6p8BH_3FikTqBVFGsvWdXT6MHXLfKfi44334,8491
20
- neurostats_API/utils/__init__.py,sha256=FTYKRFzW2XVXdnSHXnS3mQQaHlKF9xGqrMsgZZ2kroc,142
21
- neurostats_API/utils/data_process.py,sha256=YKfk3fXkcmwFS_8YxOV2uRLnt9NX3cYPV_XxrCgk8Yo,7597
21
+ neurostats_API/utils/__init__.py,sha256=0tJCRmlJq2aDwcNNW-oEaA9H0OxTJMFvjpVYtG4AvZU,186
22
+ neurostats_API/utils/calculate_value.py,sha256=lUKSsWU76XRmDUcmi4eDjoQxjb3vWpAAKInF9w49VNI,782
23
+ neurostats_API/utils/data_process.py,sha256=A--dzOsu42jRxqqCD41gTtjE5rhEBYmhB6y-AnCvo5U,8986
22
24
  neurostats_API/utils/datetime.py,sha256=XJya4G8b_-ZOaBbMXgQjWh2MC4wc-o6goQ7EQJQMWrQ,773
23
25
  neurostats_API/utils/db_client.py,sha256=OYe6yazcR4Aa6jYmy47JrryUeh2NnKGqY2K_lSZe6i8,455
24
26
  neurostats_API/utils/fetcher.py,sha256=VbrUhjA-GG5AyjPX2SHtFIbZM4dm3jo0RgZzuCbb_Io,40927
25
- neurostats_API-0.0.14.dist-info/METADATA,sha256=MeV1goaFbTQI0ddvr-TvElrggROHEiAixNCqP_gyLkI,25935
26
- neurostats_API-0.0.14.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
27
- neurostats_API-0.0.14.dist-info/top_level.txt,sha256=nSlQPMG0VtXivJyedp4Bkf86EOy2TpW10VGxolXrqnU,15
28
- neurostats_API-0.0.14.dist-info/RECORD,,
27
+ neurostats_API-0.0.16.dist-info/METADATA,sha256=9US1mdwWnOCAnwfsOj-ZLRCfo07p3yd0UfwMKS6989g,29848
28
+ neurostats_API-0.0.16.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
29
+ neurostats_API-0.0.16.dist-info/top_level.txt,sha256=nSlQPMG0VtXivJyedp4Bkf86EOy2TpW10VGxolXrqnU,15
30
+ neurostats_API-0.0.16.dist-info/RECORD,,