neurostats-API 0.0.15__py3-none-any.whl → 0.0.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- __version__='0.0.15'
1
+ __version__='0.0.16'
2
2
 
3
3
  from .fetchers import (
4
4
  BalanceSheetFetcher,
@@ -7,10 +7,12 @@ from datetime import datetime, timedelta, date
7
7
  from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
8
8
  import yaml
9
9
 
10
+
10
11
  class StatsFetcher:
12
+
11
13
  def __init__(self, ticker, db_client):
12
14
  self.ticker = ticker
13
- self.db = db_client["company"] # Replace with your database name
15
+ self.db = db_client["company"] # Replace with your database name
14
16
  self.collection = self.db["twse_stats"]
15
17
 
16
18
  self.timezone = pytz.timezone("Asia/Taipei")
@@ -26,7 +28,6 @@ class StatsFetcher:
26
28
  'grand_total_growth': [f"YoY_{i}" for i in [1, 3, 5, 10]]
27
29
  }
28
30
 
29
-
30
31
  def prepare_query(self):
31
32
  return [
32
33
  {
@@ -52,8 +53,27 @@ class StatsFetcher:
52
53
  season = (month - 1) // 3 + 1
53
54
 
54
55
  return StatsDateTime(date, year, month, day, season)
56
+
57
+ def has_required_columns(self, df:pd.DataFrame, required_cols=None):
58
+ """
59
+ Check if the required columns are present in the DataFrame.
60
+
61
+ Args:
62
+ df (pd.DataFrame): The DataFrame to check.
63
+ required_cols (list, optional): List of required column names.
64
+ Defaults to ['date', 'open', 'high', 'low', 'close', 'volume'].
65
+
66
+ Returns:
67
+ bool: True if all required columns are present, False otherwise.
68
+ """
69
+ if required_cols is None:
70
+ required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
71
+
72
+ return all(col in df.columns for col in required_cols)
73
+
55
74
 
56
75
  class BaseTEJFetcher(abc.ABC):
76
+
57
77
  def __init__(self):
58
78
  self.client = None
59
79
  self.db = None
@@ -62,25 +82,22 @@ class BaseTEJFetcher(abc.ABC):
62
82
  @abc.abstractmethod
63
83
  def get(self):
64
84
  pass
65
-
85
+
66
86
  def get_latest_data_time(self, ticker):
67
- latest_data = self.collection.find_one(
68
- {"ticker": ticker},
69
- {"last_update": 1, "_id" : 0}
70
- )
87
+ latest_data = self.collection.find_one({"ticker": ticker}, {"last_update": 1, "_id": 0})
71
88
 
72
89
  try:
73
90
  latest_date = latest_data['last_update']["latest_data_date"]
74
91
  except Exception as e:
75
92
  latest_date = None
76
-
93
+
77
94
  return latest_date
78
95
 
79
- def cal_YoY(self, data_dict: dict, start_year: int, end_year: int):
80
- year_shifts = [1,3,5,10]
96
+ def cal_YoY(self, data_dict: dict, start_year: int, end_year: int, season: int):
97
+ year_shifts = [1, 3, 5, 10]
81
98
  return_dict = {}
82
- for year in range(start_year, end_year+1):
83
- year_data = data_dict[str(year)]
99
+ for year in range(start_year, end_year + 1):
100
+ year_data = data_dict[f"{year}Q{season}"]
84
101
  year_keys = list(year_data.keys())
85
102
  for key in year_keys:
86
103
  if (key in 'season'):
@@ -93,23 +110,20 @@ class BaseTEJFetcher(abc.ABC):
93
110
  this_value = year_data[key]
94
111
  try:
95
112
  past_year = str(year - shift)
96
- last_value = data_dict[past_year][key]['value']
97
- temp_dict[f"YoY_{shift}"] = YoY_Calculator.cal_growth(
98
- this_value, last_value, delta = shift
99
- )
113
+ last_value = data_dict[f"{past_year}Q{season}"][key]
114
+ temp_dict[f"YoY_{shift}"] = YoY_Calculator.cal_growth(this_value, last_value, delta=shift)
100
115
  except Exception as e:
101
116
  temp_dict[f"YoY_{shift}"] = None
102
-
117
+
103
118
  year_data[key] = temp_dict
104
119
 
105
120
  else:
106
121
  year_data.pop(key)
107
-
108
- return_dict[year] = year_data
109
-
110
-
122
+
123
+ return_dict[f"{year}Q{season}"] = year_data
124
+
111
125
  return return_dict
112
-
126
+
113
127
  def cal_QoQ(self, data_dict):
114
128
  return_dict = {}
115
129
  for i, time_index in enumerate(data_dict.keys()):
@@ -122,7 +136,7 @@ class BaseTEJFetcher(abc.ABC):
122
136
  else:
123
137
  last_year = year
124
138
  last_season = season - 1
125
-
139
+
126
140
  this_data = data_dict[time_index]
127
141
  this_keys = list(this_data.keys())
128
142
  for key in this_keys:
@@ -137,16 +151,21 @@ class BaseTEJFetcher(abc.ABC):
137
151
  try:
138
152
  last_value = data_dict[f"{last_year}Q{last_season}"][key]['value']
139
153
 
140
- temp_dict['growth'] = YoY_Calculator.cal_growth(
141
- this_value, last_value, delta=1
142
- )
154
+ temp_dict['growth'] = YoY_Calculator.cal_growth(this_value, last_value, delta=1)
143
155
  except Exception as e:
144
156
  temp_dict['growth'] = None
145
-
157
+
146
158
  this_data[key] = temp_dict
147
159
 
148
160
  else:
149
161
  this_data.pop(key)
150
162
  return_dict[time_index] = this_data
151
163
  return return_dict
152
-
164
+
165
+ def get_dict_of_df(self, data_dict):
166
+ """
167
+ dict[dict] -> dict[df]
168
+ """
169
+ for key in data_dict.keys():
170
+ data_dict[key] = pd.DataFrame.from_dict(data_dict[key])
171
+ return data_dict
@@ -47,40 +47,46 @@ class TechFetcher(StatsFetcher):
47
47
  )
48
48
 
49
49
  def _get_ohlcv(self):
50
-
51
- if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
52
-
53
- full_tick = f'^{self.ticker}'
54
- yf_ticker = yf.Ticker(full_tick)
55
- origin_df = yf_ticker.history(period="10y")
56
- origin_df = origin_df.reset_index()
57
- origin_df["Date"] = pd.to_datetime(origin_df["Date"]).dt.date
58
- df = origin_df.rename(
59
- columns={
60
- "Date": "date",
61
- "Open": "open",
62
- "High": "high",
63
- "Low": "low",
64
- "Close": "close",
65
- "Volume": "volume"
66
- }
67
- )
68
- else:
69
50
 
51
+ required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
52
+
53
+ try:
70
54
  query = {'ticker': self.ticker}
71
- ticker_full = list(self.collection.find(query))
55
+ ticker_full = self.collection.find_one(query)
72
56
 
73
57
  if not ticker_full:
74
58
  raise ValueError(f"No data found for ticker: {self.ticker}")
75
59
 
76
- if 'daily_data' not in ticker_full[0] or ticker_full[0]['daily_data'] is None:
77
- raise KeyError("Missing 'daily_data' in the retrieved data")
60
+ daily_data = ticker_full.get("daily_data", [])
61
+ if not isinstance(daily_data, list):
62
+ raise TypeError("Expected 'daily_data' to be a list.")
63
+
64
+ df = pd.DataFrame(daily_data)
78
65
 
79
- df = pd.DataFrame(ticker_full[0]['daily_data'])
66
+ if not self.has_required_columns(df, required_cols):
67
+ raise KeyError(f"Missing required columns")
80
68
 
81
- selected_cols = ['date','open','high','low','close','volume']
69
+ except (KeyError, ValueError, TypeError) as e:
70
+
71
+ print(f"Conduct yf searching")
72
+
73
+ if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
74
+ full_tick = f'^{self.ticker}'
75
+ else:
76
+ full_tick = f'{self.ticker}.tw'
77
+
78
+ df = self.conduct_yf_search(full_tick)
79
+
80
+ if not self.has_required_columns(df, required_cols):
81
+
82
+ print(f".tw failed, try .two")
83
+
84
+ full_tick = f'{self.ticker}.two'
85
+
86
+ df = self.conduct_yf_search(full_tick)
87
+
88
+ return df[required_cols]
82
89
 
83
- return df[selected_cols]
84
90
 
85
91
  def get_daily(self):
86
92
 
@@ -101,6 +107,29 @@ class TechFetcher(StatsFetcher):
101
107
  def get_yearly(self):
102
108
 
103
109
  return self.yearly_index
110
+
111
+ def conduct_yf_search(self, ticker:str):
112
+
113
+ yf_ticker = yf.Ticker(ticker)
114
+ origin_df = yf_ticker.history(period="10y")
115
+
116
+ if origin_df.empty:
117
+ return origin_df
118
+
119
+ origin_df = origin_df.reset_index()
120
+ origin_df["Date"] = pd.to_datetime(origin_df["Date"])
121
+ df = origin_df.rename(
122
+ columns={
123
+ "Date": "date",
124
+ "Open": "open",
125
+ "High": "high",
126
+ "Low": "low",
127
+ "Close": "close",
128
+ "Volume": "volume"
129
+ }
130
+ )
131
+
132
+ return df
104
133
 
105
134
  class TechProcessor:
106
135
 
@@ -8,56 +8,39 @@ import warnings
8
8
 
9
9
 
10
10
  class FinanceReportFetcher(BaseTEJFetcher):
11
+
11
12
  class FetchMode(Enum):
12
13
  YOY = 1
13
14
  QOQ = 2
14
15
  YOY_NOCAL = 3
15
16
  QOQ_NOCAL = 4
16
17
 
17
- def __init__(self, mongo_uri, db_name = "company", collection_name = "TWN/AINVFQ1"):
18
+ def __init__(self, mongo_uri, db_name="company", collection_name="TWN/AINVFQ1"):
18
19
  self.client = MongoClient(mongo_uri)
19
20
  self.db = self.client[db_name]
20
21
  self.collection = self.db[collection_name]
21
22
 
23
+ # yapf: disabled
22
24
  self.check_index = {
23
- 'coid', 'mdate', 'key3', 'no',
24
- 'sem', 'merg', 'curr', 'annd',
25
- 'fin_ind', 'bp11', 'bp21', 'bp22',
26
- 'bp31', 'bp41', 'bp51', 'bp53',
27
- 'bp61', 'bp62', 'bp63', 'bp64',
28
- 'bp65', 'bf11', 'bf12', 'bf21',
29
- 'bf22', 'bf41', 'bf42', 'bf43',
30
- 'bf44', 'bf45', 'bf99', 'bsca',
31
- 'bsnca', 'bsta','bscl','bsncl',
32
- 'bstl','bsse','bslse','debt',
33
- 'quick','ppe','ar','ip12',
34
- 'ip22','ip31','ip51','iv41',
35
- 'if11','isibt','isni','isnip',
36
- 'eps','ispsd','gm','opi',
37
- 'nri','ri','nopi','ebit',
38
- 'cip31','cscfo','cscfi','cscff',
39
- 'person','shares','wavg','taxrate',
40
- 'r104','r115','r105','r106',
41
- 'r107','r108','r201','r112',
42
- 'r401','r402','r403','r404',
43
- 'r405','r408','r409','r410',
44
- 'r502','r501','r205','r505',
45
- 'r517','r512','r509','r608',
46
- 'r616','r610','r607','r613',
47
- 'r612','r609','r614','r611',
48
- 'r307','r304','r305','r306',
49
- 'r316','r834'
50
- }
25
+ 'coid', 'mdate', 'key3', 'no', 'sem', 'merg', 'curr', 'annd', 'fin_ind', 'bp11', 'bp21', 'bp22', 'bp31',
26
+ 'bp41', 'bp51', 'bp53', 'bp61', 'bp62', 'bp63', 'bp64', 'bp65', 'bf11', 'bf12', 'bf21', 'bf22', 'bf41',
27
+ 'bf42', 'bf43', 'bf44', 'bf45', 'bf99', 'bsca', 'bsnca', 'bsta', 'bscl', 'bsncl', 'bstl', 'bsse', 'bslse',
28
+ 'debt', 'quick', 'ppe', 'ar', 'ip12', 'ip22', 'ip31', 'ip51', 'iv41', 'if11', 'isibt', 'isni', 'isnip',
29
+ 'eps', 'ispsd', 'gm', 'opi', 'nri', 'ri', 'nopi', 'ebit', 'cip31', 'cscfo', 'cscfi', 'cscff', 'person',
30
+ 'shares', 'wavg', 'taxrate', 'r104', 'r115', 'r105', 'r106', 'r107', 'r108', 'r201', 'r112', 'r401', 'r402',
31
+ 'r403', 'r404', 'r405', 'r408', 'r409', 'r410', 'r502', 'r501', 'r205', 'r505', 'r517', 'r512', 'r509',
32
+ 'r608', 'r616', 'r610', 'r607', 'r613', 'r612', 'r609', 'r614', 'r611', 'r307', 'r304', 'r305', 'r306',
33
+ 'r316', 'r834'
34
+ } # yapf: enabled
51
35
 
52
36
  def get(
53
- self,
54
- ticker,
55
- fetch_mode: FetchMode = FetchMode.QOQ,
56
- start_date: str = None,
57
- end_date: str = None,
58
- report_type: str = "Q",
59
- indexes: list = []
60
- ):
37
+ self,
38
+ ticker,
39
+ fetch_mode: FetchMode = FetchMode.QOQ_NOCAL,
40
+ start_date: str = None,
41
+ end_date: str = None,
42
+ report_type: str = "Q",
43
+ indexes: list = []):
61
44
  """
62
45
  基礎的query function
63
46
  ticker(str): 股票代碼
@@ -72,22 +55,14 @@ class FinanceReportFetcher(BaseTEJFetcher):
72
55
  # 確認indexes中是否有錯誤的index,有的話回傳warning
73
56
  if (indexes and self.check_index):
74
57
  indexes = set(indexes)
75
- difference = indexes-self.check_index
58
+ difference = indexes - self.check_index
76
59
  if (difference):
77
- warnings.warn(
78
- f"{list(difference)} 沒有出現在資料表中,請確認column名稱是否正確",
79
- UserWarning
80
- )
81
-
60
+ warnings.warn(f"{list(difference)} 沒有出現在資料表中,請確認column名稱是否正確", UserWarning)
82
61
 
83
- if (fetch_mode in {
84
- self.FetchMode.QOQ,
85
- self.FetchMode.QOQ_NOCAL
86
- }
87
- ):
62
+ if (fetch_mode in {self.FetchMode.QOQ, self.FetchMode.QOQ_NOCAL}):
88
63
  if (not start_date):
89
64
  warnings.warn("No start_date specified, use default date = \"2005-01-01\"", UserWarning)
90
- start_date = datetime.strptime("2005-01-01", "%Y-%m-%d")
65
+ start_date = datetime.strptime("2005-01-01", "%Y-%m-%d")
91
66
  if (not end_date):
92
67
  warnings.warn("No end_date specified, use default date = today", UserWarning)
93
68
  end_date = datetime.today()
@@ -114,17 +89,13 @@ class FinanceReportFetcher(BaseTEJFetcher):
114
89
  end_season=end_season,
115
90
  report_type=report_type,
116
91
  indexes=indexes,
117
- use_cal=use_cal
118
- )
92
+ use_cal=use_cal)
119
93
 
120
94
  return data_df
121
95
 
122
- elif (fetch_mode in {
123
- self.FetchMode.YOY,
124
- self.FetchMode.YOY_NOCAL
125
- }
126
- ):
127
- start_year = 2005
96
+ elif (fetch_mode in {self.FetchMode.YOY, self.FetchMode.YOY_NOCAL}):
97
+ start_date = datetime.strptime(start_date, "%Y-%m-%d")
98
+ start_year = start_date.year
128
99
  end_date = self.get_latest_data_time(ticker)
129
100
  if (not end_date):
130
101
  end_date = datetime.today()
@@ -138,98 +109,140 @@ class FinanceReportFetcher(BaseTEJFetcher):
138
109
  use_cal = False
139
110
 
140
111
  data_df = self.get_YoY_data(
141
- ticker = ticker,
142
- start_year = start_year,
143
- end_year = end_year,
144
- season = season,
145
- report_type = report_type,
146
- indexes = indexes
147
- )
148
-
112
+ ticker=ticker,
113
+ start_year=start_year,
114
+ end_year=end_year,
115
+ season=season,
116
+ report_type=report_type,
117
+ indexes=indexes,
118
+ use_cal=use_cal)
119
+
149
120
  return data_df
150
121
 
151
122
  def get_QoQ_data(
152
- self,
153
- ticker,
154
- start_year,
155
- start_season,
156
- end_year,
157
- end_season,
158
- report_type = "Q",
159
- indexes = [],
160
- use_cal = False
161
- ):
123
+ self, ticker, start_year, start_season, end_year, end_season, report_type="Q", indexes=[], use_cal=False):
162
124
  """
163
125
  取得時間範圍內每季資料
164
126
  """
165
- if (not indexes): # 沒有指定 -> 取全部
127
+ if (use_cal):
128
+ if (start_season == 1):
129
+ lower_bound_year = start_year - 1
130
+ lower_bound_season = 4
131
+
132
+ else:
133
+ lower_bound_year = start_year
134
+ lower_bound_season = start_season - 1
135
+
136
+ else:
137
+ lower_bound_year = start_year,
138
+ lower_bound_season = start_season
139
+
140
+ if (not indexes): # 沒有指定 -> 取全部
166
141
  pipeline = [
167
- { "$match": { "ticker": ticker } },
168
- { "$unwind": "$data" },
169
- { "$match": {
170
- "$or": [
171
- { "data.year": { "$gt": start_year, "$lt": end_year } },
172
- { "data.year": start_year, "data.season": { "$gte": start_season } },
173
- { "data.year": end_year, "data.season": { "$lte": end_season } }
174
- ]
175
- }},
176
- { "$project": {
177
- "data.year": 1,
178
- "data.season": 1,
179
- f"data.{report_type}": 1,
180
- "_id": 0
181
- }
142
+ {
143
+ "$match": {
144
+ "ticker": ticker
145
+ }
146
+ }, {
147
+ "$unwind": "$data"
148
+ }, {
149
+ "$match":
150
+ {
151
+ "$or":
152
+ [
153
+ {
154
+ "data.year": {
155
+ "$gt": start_year,
156
+ "$lt": end_year
157
+ }
158
+ }, {
159
+ "data.year": start_year,
160
+ "data.season": {
161
+ "$gte": start_season
162
+ }
163
+ }, {
164
+ "data.year": end_year,
165
+ "data.season": {
166
+ "$lte": end_season
167
+ }
168
+ }, {
169
+ "data.year": lower_bound_year,
170
+ "data.season": lower_bound_season
171
+ }
172
+ ]
173
+ }
174
+ }, {
175
+ "$project": {
176
+ "data.year": 1,
177
+ "data.season": 1,
178
+ f"data.{report_type}": 1,
179
+ "_id": 0
180
+ }
182
181
  }
183
182
  ]
184
183
 
185
-
186
- else: # 取指定index
187
- project_stage = {
188
- "data.year": 1,
189
- "data.season": 1
190
- }
184
+ else: # 取指定index
185
+ project_stage = {"data.year": 1, "data.season": 1}
191
186
  for index in indexes:
192
187
  project_stage[f"data.{report_type}.{index}"] = 1
193
188
 
194
189
  pipeline = [
195
- { "$match": { "ticker": ticker } },
196
- { "$unwind": "$data" },
197
- { "$match": {
198
- "$or": [
199
- { "data.year": { "$gt": start_year, "$lt": end_year } },
200
- { "data.year": start_year, "data.season": { "$gte": start_season } },
201
- { "data.year": end_year, "data.season": { "$lte": end_season } }
202
- ]
203
- }},
204
- { "$project": project_stage }
190
+ {
191
+ "$match": {
192
+ "ticker": ticker
193
+ }
194
+ }, {
195
+ "$unwind": "$data"
196
+ }, {
197
+ "$match":
198
+ {
199
+ "$or":
200
+ [
201
+ {
202
+ "data.year": {
203
+ "$gt": start_year,
204
+ "$lt": end_year
205
+ }
206
+ }, {
207
+ "data.year": start_year,
208
+ "data.season": {
209
+ "$gte": start_season
210
+ }
211
+ }, {
212
+ "data.year": end_year,
213
+ "data.season": {
214
+ "$lte": end_season
215
+ }
216
+ }, {
217
+ "data.year": lower_bound_year,
218
+ "data.season": lower_bound_season
219
+ }
220
+ ]
221
+ }
222
+ }, {
223
+ "$project": project_stage
224
+ }
205
225
  ]
206
226
 
207
-
208
227
  fetched_data = self.collection.aggregate(pipeline).to_list()
209
-
228
+
210
229
  data_dict = StatsProcessor.list_of_dict_to_dict(
211
- fetched_data,
212
- keys = ["year", "season"],
213
- delimeter = "Q",
214
- data_key=report_type
215
- )
230
+ fetched_data, keys=["year", "season"], delimeter="Q", data_key=report_type)
231
+
216
232
  if (use_cal):
217
233
  data_with_QoQ = self.cal_QoQ(data_dict)
218
234
  data_df = pd.DataFrame.from_dict(data_with_QoQ)
235
+ data_df = data_df.iloc[:, 1:]
236
+ data_df = data_df.iloc[:, ::-1].T
237
+ data_dict = data_df.to_dict()
238
+ data_dict = self.get_dict_of_df(data_dict)
239
+ return data_dict
219
240
  else:
220
241
  data_df = pd.DataFrame.from_dict(data_dict)
221
- return data_df
222
-
223
- def get_YoY_data(
224
- self,
225
- ticker,
226
- start_year,
227
- end_year,
228
- season,
229
- report_type = "Q",
230
- indexes = [],
231
- use_cal = False
232
- ):
242
+ data_df = data_df.iloc[:, ::-1]
243
+ return data_df
244
+
245
+ def get_YoY_data(self, ticker, start_year, end_year, season, report_type="Q", indexes=[], use_cal=False):
233
246
  """
234
247
  取得某季歷年資料
235
248
  """
@@ -237,77 +250,90 @@ class FinanceReportFetcher(BaseTEJFetcher):
237
250
  select_year = set()
238
251
 
239
252
  for year in range(start_year, end_year + 1):
240
- year_shifts = {
241
- year,
242
- year - 1,
243
- year - 3,
244
- year - 5,
245
- year - 10
246
- }
253
+ year_shifts = {year, year - 1, year - 3, year - 5, year - 10}
247
254
 
248
255
  select_year = select_year.union(year_shifts)
249
-
256
+
250
257
  select_year = sorted(list(select_year), reverse=True)
251
258
  else:
252
259
  select_year = [year for year in range(start_year, end_year + 1)]
253
260
 
254
- if (not indexes): # 沒有指定 -> 取全部
261
+ if (not indexes): # 沒有指定 -> 取全部
255
262
  pipeline = [
256
- { "$match": { "ticker": ticker } },
257
- { "$unwind": "$data" },
258
- { "$match": {
259
- "$or":[
263
+ {
264
+ "$match": {
265
+ "ticker": ticker
266
+ }
267
+ }, {
268
+ "$unwind": "$data"
269
+ }, {
270
+ "$match":
260
271
  {
261
- "$and": [
262
- { "data.year": {"$in": select_year }},
263
- { "data.season": {"$eq": season}}
264
- ]
265
- },
266
- ]
267
- }},
268
- { "$project": {
269
- "data.year": 1,
270
- "data.season": 1,
271
- f"data.{report_type}": 1,
272
- "_id": 0
273
- }
272
+ "$or": [{
273
+ "$and": [{
274
+ "data.year": {
275
+ "$in": select_year
276
+ }
277
+ }, {
278
+ "data.season": {
279
+ "$eq": season
280
+ }
281
+ }]
282
+ },]
283
+ }
284
+ }, {
285
+ "$project": {
286
+ "data.year": 1,
287
+ "data.season": 1,
288
+ f"data.{report_type}": 1,
289
+ "_id": 0
290
+ }
274
291
  }
275
292
  ]
276
293
 
277
-
278
- else: # 取指定index
279
- project_stage = {
280
- "data.year": 1,
281
- "data.season": 1
282
- }
294
+ else: # 取指定index
295
+ project_stage = {"data.year": 1, "data.season": 1}
283
296
  for index in indexes:
284
297
  project_stage[f"data.{report_type}.{index}"] = 1
285
298
 
286
299
  pipeline = [
287
- { "$match": { "ticker": ticker } },
288
- { "$unwind": "$data" },
289
- { "$match": {
290
- "$and": [
291
- { "data.year": {"$in": select_year}},
292
- { "data.season": {"$eq": season}}
293
- ]
294
- }},
295
- { "$project": project_stage }
300
+ {
301
+ "$match": {
302
+ "ticker": ticker
303
+ }
304
+ }, {
305
+ "$unwind": "$data"
306
+ }, {
307
+ "$match": {
308
+ "$and": [{
309
+ "data.year": {
310
+ "$in": select_year
311
+ }
312
+ }, {
313
+ "data.season": {
314
+ "$eq": season
315
+ }
316
+ }]
317
+ }
318
+ }, {
319
+ "$project": project_stage
320
+ }
296
321
  ]
297
322
 
298
323
  fetched_data = self.collection.aggregate(pipeline).to_list()
299
324
 
300
325
  # 處理計算YoY
301
326
  data_dict = StatsProcessor.list_of_dict_to_dict(
302
- fetched_data,
303
- keys = ['year', 'season'],
304
- data_key=report_type,
305
- delimeter='Q'
306
- )
327
+ fetched_data, keys=['year', 'season'], data_key=report_type, delimeter='Q')
328
+
307
329
  if (use_cal):
308
- data_with_YoY = self.cal_YoY(data_dict, start_year, end_year)
309
- result_df = pd.DataFrame.from_dict(data_with_YoY)
330
+ data_with_YoY = self.cal_YoY(data_dict, start_year, end_year, season)
331
+ data_df = pd.DataFrame.from_dict(data_with_YoY)
332
+ data_df = data_df.iloc[:, ::-1].T
333
+ data_dict = data_df.to_dict()
334
+ data_dict = self.get_dict_of_df(data_dict)
335
+ return data_dict
310
336
  else:
311
- result_df = pd.DataFrame.from_dict(data_dict)
312
-
313
- return result_df
337
+ data_df = pd.DataFrame.from_dict(data_dict)
338
+ data_df = data_df.iloc[:, ::-1]
339
+ return data_df
@@ -12,51 +12,68 @@ class ValueFetcher(StatsFetcher):
12
12
  def prepare_query(self, start_date, end_date):
13
13
  pipeline = super().prepare_query()
14
14
 
15
- pipeline.append({
16
- "$project": {
17
- "_id": 0,
18
- "ticker": 1,
19
- "company_name": 1,
20
- "daily_data": {
21
- "$map": {
22
- "input": {
23
- "$filter": {
24
- "input": "$daily_data",
25
- "as": "daily",
26
- "cond": {
27
- "$and": [{
28
- "$gte": ["$$daily.date", start_date]
29
- }, {
30
- "$lte": ["$$daily.date", end_date]
31
- }]
32
- }
33
- }
34
- },
35
- "as": "daily_item",
36
- "in": {
37
- "date": "$$daily_item.date",
38
- "close": "$$daily_item.close",
39
- "P_B": "$$daily_item.P_B",
40
- "P_E": "$$daily_item.P_E",
41
- "P_FCF": "$$daily_item.P_FCF",
42
- "P_S": "$$daily_item.P_S",
43
- "EV_OPI": "$$daily_item.EV_OPI",
44
- "EV_EBIT": "$$daily_item.EV_EBIT",
45
- "EV_EBITDA": "$$daily_item.EV_EBITDA",
46
- "EV_S": "$$daily_item.EV_S"
47
- }
15
+ pipeline.append(
16
+ {
17
+ "$project":
18
+ {
19
+ "_id": 0,
20
+ "ticker": 1,
21
+ "company_name": 1,
22
+ "daily_data":
23
+ {
24
+ "$map":
25
+ {
26
+ "input":
27
+ {
28
+ "$filter":
29
+ {
30
+ "input": "$daily_data",
31
+ "as": "daily",
32
+ "cond":
33
+ {
34
+ "$and":
35
+ [
36
+ {
37
+ "$gte": ["$$daily.date", start_date]
38
+ }, {
39
+ "$lte": ["$$daily.date", end_date]
40
+ }
41
+ ]
42
+ }
43
+ }
44
+ },
45
+ "as": "daily_item",
46
+ "in":
47
+ {
48
+ "date": "$$daily_item.date",
49
+ "close": "$$daily_item.close",
50
+ "P_B": "$$daily_item.P_B",
51
+ "P_E": "$$daily_item.P_E",
52
+ "P_FCF": "$$daily_item.P_FCF",
53
+ "P_S": "$$daily_item.P_S",
54
+ "EV_OPI": "$$daily_item.EV_OPI",
55
+ "EV_EBIT": "$$daily_item.EV_EBIT",
56
+ "EV_EBITDA": "$$daily_item.EV_EBITDA",
57
+ "EV_S": "$$daily_item.EV_S"
58
+ }
59
+ }
60
+ },
61
+ "yearly_data": 1
48
62
  }
49
- },
50
- "yearly_data": 1
51
- }
52
- })
63
+ })
53
64
 
54
65
  return pipeline
55
66
 
67
+ def collect_data(self, start_date, end_date):
68
+ pipeline = self.prepare_query(start_date, end_date)
69
+
70
+ fetched_data = list(self.collection.aggregate(pipeline))
71
+
72
+ return fetched_data[0]
73
+
56
74
  def query_data(self):
57
75
  try:
58
- latest_time = StatsDateTime.get_latest_time(
59
- self.ticker, self.collection)['last_update_time']
76
+ latest_time = StatsDateTime.get_latest_time(self.ticker, self.collection)['last_update_time']
60
77
  target_year = latest_time['daily_data']['last_update'].year
61
78
  start_date = latest_time['daily_data']['last_update'] - timedelta(days=31)
62
79
  end_date = latest_time['daily_data']['last_update']
@@ -79,7 +96,7 @@ class ValueFetcher(StatsFetcher):
79
96
  )
80
97
 
81
98
  return fetched_data
82
-
99
+
83
100
  def query_value_serie(self):
84
101
  """
85
102
  回傳指定公司的歷來評價
@@ -104,28 +121,32 @@ class ValueFetcher(StatsFetcher):
104
121
  }
105
122
  },
106
123
  {
107
- "$project": {
108
- "_id": 0,
109
- "ticker": 1,
110
- "company_name": 1,
111
- "daily_data": {
112
- "$map": {
113
- "input": "$daily_data", # 正確地指定要處理的陣列
114
- "as": "daily", # 每個元素的名稱
115
- "in": {
116
- "date": "$$daily.date",
117
- "P_E": "$$daily.P_E",
118
- "P_FCF": "$$daily.P_FCF",
119
- "P_B": "$$daily.P_B",
120
- "P_S": "$$daily.P_S",
121
- "EV_OPI": "$$daily.EV_OPI",
122
- "EV_EBIT": "$$daily.EV_EBIT",
123
- "EV_EBITDA": "$$daily.EV_EBITDA",
124
- "EV_S": "$$daily.EV_S"
124
+ "$project":
125
+ {
126
+ "_id": 0,
127
+ "ticker": 1,
128
+ "company_name": 1,
129
+ "daily_data":
130
+ {
131
+ "$map":
132
+ {
133
+ "input": "$daily_data", # 正確地指定要處理的陣列
134
+ "as": "daily", # 每個元素的名稱
135
+ "in":
136
+ {
137
+ "date": "$$daily.date",
138
+ "P_E": "$$daily.P_E",
139
+ "P_FCF": "$$daily.P_FCF",
140
+ "P_B": "$$daily.P_B",
141
+ "P_S": "$$daily.P_S",
142
+ "EV_OPI": "$$daily.EV_OPI",
143
+ "EV_EBIT": "$$daily.EV_EBIT",
144
+ "EV_EBITDA": "$$daily.EV_EBITDA",
145
+ "EV_S": "$$daily.EV_S"
146
+ }
147
+ }
125
148
  }
126
- }
127
149
  }
128
- }
129
150
  }
130
151
  ]
131
152
 
@@ -133,21 +154,17 @@ class ValueFetcher(StatsFetcher):
133
154
  fetched_data = fetched_data[0]
134
155
 
135
156
  value_keys = ["P_E", "P_FCF", "P_B", "P_S", "EV_OPI", "EV_EBIT", "EV_EVITDA", "EV_S"]
136
- return_dict = {
137
- value_key: dict() for value_key in value_keys
138
- }
157
+ return_dict = {value_key: dict() for value_key in value_keys}
139
158
 
140
159
  for value_key in value_keys:
141
160
  for data in fetched_data['daily_data']:
142
161
  if (value_key not in data.keys()):
143
162
  continue
144
163
  else:
145
- return_dict[value_key].update({
146
- data['date']: data[value_key]
147
- })
164
+ return_dict[value_key].update({data['date']: data[value_key]})
148
165
 
149
166
  return_dict = {
150
- value_key: pd.DataFrame.from_dict(value_dict, orient = 'index', columns = [value_key])
167
+ value_key: pd.DataFrame.from_dict(value_dict, orient='index', columns=[value_key])
151
168
  for value_key, value_dict in return_dict.items()
152
169
  }
153
170
  return return_dict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
- Name: neurostats-API
3
- Version: 0.0.15
2
+ Name: neurostats_API
3
+ Version: 0.0.17
4
4
  Summary: The service of NeuroStats website
5
5
  Home-page: https://github.com/NeurowattStats/NeuroStats_API.git
6
6
  Author: JasonWang@Neurowatt
@@ -89,7 +89,7 @@ pip install neurostats-API
89
89
  ```Python
90
90
  >>> import neurostats_API
91
91
  >>> print(neurostats_API.__version__)
92
- 0.0.15
92
+ 0.0.16
93
93
  ```
94
94
 
95
95
  ### 得到最新一期的評價資料與歷年評價
@@ -691,21 +691,25 @@ fetcher = FinanceReportFetcher(
691
691
 
692
692
  data = fetcher.get(
693
693
  ticker = "2330" # 任意的股票代碼
694
- fetch_mode = fetcher.YOY_NOCAL # 取得模式
694
+ fetch_mode = fetcher.FetchMode.QOQ_NOCAL # 取得模式
695
695
  start_date = "2005-01-01",
696
696
  end_date = "2024-12-31",
697
697
  report_type = "Q",
698
698
  indexes = []
699
- )
699
+ ) # -> pd.DataFrame or Dict[pd.DataFrame]
700
700
  ```
701
701
  - `ticker`: 股票代碼
702
702
 
703
703
  - `fetch_mode` : 取得模式,為`fetcher.YOY_NOCAL` 或 `fetcher.QOQ_NOCAL`
704
- - `YOY_NOCAL`: 以end_date為準,取得與end_date為準同季的歷年資料,時間範圍以start_date為準
704
+ - `YOY_NOCAL`: 以end_date為準,取得與end_date同季的歷年資料,時間範圍以start_date為起始
705
705
  > 例如`start_date = "2020-07-01"`, `end_date = "2024-01-01"`,會回傳2020~2024的第一季資料
706
706
 
707
707
  - `QOQ_NOCAL`: 時間範圍內的每季資料
708
708
 
709
+ - `QOQ`: 時間範圍內每季的每個index的數值以及QoQ
710
+
711
+ - `YoY`: 以end_date為準,取得與end_date同季的歷年資料以及成長率,時間範圍以start_date為起始
712
+
709
713
  - `start_date`: 開始日期,不設定時預設為`2005-01-01`
710
714
 
711
715
  - `end_date`: 結束日期,不設定時預設為資料庫最新資料的日期
@@ -722,10 +726,61 @@ data = fetcher.get(
722
726
  請看 `會計師簽證財務資料`
723
727
 
724
728
  #### 回傳資料
725
- fetch_mode設定為`YOY_NOCAL`與`QOQ_NOCAL`下
726
- 為回傳pd.DataFramecolumn名稱為<年份>Q<季>, row名稱為指定財報項目
729
+ ##### `YOY_NOCAL` 與 `QOQ_NOCAL`
730
+ 為回傳`pd.DataFrame`,column名稱為<年份>Q<季>, row名稱為指定財報項目
731
+ ```Python
732
+ # fetch_mode = fetcher.FetchMode.QOQ_NOCAL
733
+ 2024Q3 2024Q2 2024Q1
734
+ bp41 7.082005e+07 6.394707e+07 5.761001e+07
735
+ bp51 3.111298e+09 3.145373e+09 3.091985e+09
736
+
737
+ # fetch_mode = fetcher.FetchMode.YOY_NOCAL
738
+ 2024Q3 2023Q3 2022Q3
739
+ bp41 7.082005e+07 5.377231e+07 6.201822e+07
740
+ bp51 3.111298e+09 3.173919e+09 2.453840e+09
741
+ ```
742
+
743
+ ##### `YOY` 與 `QOQ`
744
+ 回傳為`Dict[pd.DataFrame]`, key 為指定的index, DataFrame中則是該index歷年的數值與成長率
745
+ ```Python
746
+ # fetch_mode = fetcher.FetchMode.QOQ
747
+ {
748
+ 'bp41':
749
+ 2024Q3 2024Q2 2024Q1
750
+ value 7.082005e+07 6.394707e+07 5.761001e+07
751
+ growth 1.074791e-01 1.099994e-01 5.532101e-03,
752
+ 'bp51':
753
+ 2024Q3 2024Q2 2024Q1
754
+ value 3.111298e+09 3.145373e+09 3.091985e+09
755
+ growth -1.083335e-02 1.726663e-02 -4.159542e-03
756
+ }
757
+
758
+ # fetch_mode = fetcher.FetchMode.YOY
759
+ {
760
+ 'bp41':
761
+ 2024Q3 2023Q3 2022Q3
762
+ value 7.082005e+07 5.377231e+07 6.201822e+07
763
+ YoY_1 NaN NaN 4.130744e-01
764
+ YoY_3 1.729171e-01 9.556684e-02 1.883274e-01
765
+ YoY_5 1.389090e-01 1.215242e-01 1.642914e-01
766
+ YoY_10 1.255138e-01 1.356297e-01 1.559702e-01,
767
+ 'bp51':
768
+ 2024Q3 2023Q3 2022Q3
769
+ value 3.111298e+09 3.173919e+09 2.453840e+09
770
+ YoY_1 NaN NaN 3.179539e-01
771
+ YoY_3 1.866752e-01 2.766851e-01 2.638677e-01
772
+ YoY_5 2.068132e-01 2.479698e-01 1.815106e-01
773
+ YoY_10 1.420500e-01 1.586797e-01 1.551364e-01
774
+ }
775
+ ```
776
+
727
777
 
728
778
  ## 版本紀錄
779
+ ## 0.0.16
780
+ - 處理ValueFetcher的error #issue76
781
+
782
+ - tej_fetcher新增 QOQ, YOY功能
783
+
729
784
  ## 0.0.15
730
785
  - TechFetcher中新增指數條件
731
786
 
@@ -1,18 +1,18 @@
1
- neurostats_API/__init__.py,sha256=oEkbIWbrC6-8sBPEJQXg0QYoz3TNZtYXhSTEO6d0JcU,261
1
+ neurostats_API/__init__.py,sha256=5ToELVqNOIdVJrMj5G8JvbyRIjvo1FxcP6e-a-iMe1Y,261
2
2
  neurostats_API/cli.py,sha256=UJSWLIw03P24p-gkBb6JSEI5dW5U12UvLf1L8HjQD-o,873
3
3
  neurostats_API/main.py,sha256=QcsfmWivg2Dnqw3MTJWiI0QvEiRs0VuH-BjwQHFCv00,677
4
4
  neurostats_API/fetchers/__init__.py,sha256=B4aBwVzf_X-YieEf3fZteU0qmBPVIB9VjrmkyWhLK18,489
5
5
  neurostats_API/fetchers/balance_sheet.py,sha256=sQv4Gk5uoKURLEdh57YknOQWiyVwaXJ2Mw75jxNqUS0,5804
6
- neurostats_API/fetchers/base.py,sha256=4YS8MJR3u9Sg6dKX7QoCYuqNeQaoYHIlvPm5x8VQ72U,4882
6
+ neurostats_API/fetchers/base.py,sha256=Rl88Mhvi0uFpPupUvy0iyS7IA4B3fnn6ovMNzS7EU34,5594
7
7
  neurostats_API/fetchers/cash_flow.py,sha256=TY7VAWVXkj5-mzH5Iu0sIE-oV8MvGmmDy0URNotNV1E,7614
8
8
  neurostats_API/fetchers/finance_overview.py,sha256=PxUdWY0x030olYMLcCHDBn068JLmCE2RTOce1dxs5vM,27753
9
9
  neurostats_API/fetchers/institution.py,sha256=UrcBc6t7u7CnEwUsf6YmLbbJ8VncdWpq8bCz17q2dgs,11168
10
10
  neurostats_API/fetchers/margin_trading.py,sha256=lQImtNdvaBoSlKhJvQ3DkH3HjSSgKRJz4ZZpyR5-Z4I,10433
11
11
  neurostats_API/fetchers/month_revenue.py,sha256=nixX2llzjCFr2m2YVjxrSfkBusnZPrPb2dRDq1XLGhw,4251
12
12
  neurostats_API/fetchers/profit_lose.py,sha256=EN9Y0iamcAaHMZdjHXO6b_2buLnORssf8ZS7A0hi74s,5896
13
- neurostats_API/fetchers/tech.py,sha256=Hol1bcwJ_ERcnoTXNWlqqaWOuzdl7MeiAjCvzQMZDTg,12269
14
- neurostats_API/fetchers/tej_finance_report.py,sha256=VDP0Lx2ErCgIBBz7nbquC1ugkcnj6p7ehM2JtFInjsQ,10218
15
- neurostats_API/fetchers/value_invest.py,sha256=_eQxuEnIYvksb06QHixGK29Gnwr_3xmI6Tu7dv4J__E,5769
13
+ neurostats_API/fetchers/tech.py,sha256=8U6kn7cvWJsmKIMn_f2l6U9H_NBy_OwOXlS26XhFIv0,12926
14
+ neurostats_API/fetchers/tej_finance_report.py,sha256=laXph2ca1LCFocZjjdvtzmm5fcUecHk2Gs5h6-XMSWY,12967
15
+ neurostats_API/fetchers/value_invest.py,sha256=b_x2Dpgs8VBU5HdG8ocKtfIEkqhU-Q0S5n6RxuFuM2g,7467
16
16
  neurostats_API/tools/balance_sheet.yaml,sha256=6XygNG_Ybb1Xkk1e39LMLKr7ATvaCP3xxuwFbgNl6dA,673
17
17
  neurostats_API/tools/cash_flow_percentage.yaml,sha256=fk2Z4eb1JjGFvP134eJatHacB7BgTkBenhDJr83w8RE,1345
18
18
  neurostats_API/tools/finance_overview_dict.yaml,sha256=B9nV75StXkrF3yv2-eezzitlJ38eEK86RD_VY6588gQ,2884
@@ -24,7 +24,7 @@ neurostats_API/utils/data_process.py,sha256=A--dzOsu42jRxqqCD41gTtjE5rhEBYmhB6y-
24
24
  neurostats_API/utils/datetime.py,sha256=XJya4G8b_-ZOaBbMXgQjWh2MC4wc-o6goQ7EQJQMWrQ,773
25
25
  neurostats_API/utils/db_client.py,sha256=OYe6yazcR4Aa6jYmy47JrryUeh2NnKGqY2K_lSZe6i8,455
26
26
  neurostats_API/utils/fetcher.py,sha256=VbrUhjA-GG5AyjPX2SHtFIbZM4dm3jo0RgZzuCbb_Io,40927
27
- neurostats_API-0.0.15.dist-info/METADATA,sha256=btfdGRam5QpUHFFiA_UPWYeZuAqAMYkEJ0Ufod399T4,27959
28
- neurostats_API-0.0.15.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
29
- neurostats_API-0.0.15.dist-info/top_level.txt,sha256=nSlQPMG0VtXivJyedp4Bkf86EOy2TpW10VGxolXrqnU,15
30
- neurostats_API-0.0.15.dist-info/RECORD,,
27
+ neurostats_API-0.0.17.dist-info/METADATA,sha256=_MqEN2Yi-tDE8i4UzX9WGUi25Z7SzyNgDR2kj0p2vhw,29848
28
+ neurostats_API-0.0.17.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
29
+ neurostats_API-0.0.17.dist-info/top_level.txt,sha256=nSlQPMG0VtXivJyedp4Bkf86EOy2TpW10VGxolXrqnU,15
30
+ neurostats_API-0.0.17.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.45.0)
2
+ Generator: setuptools (75.5.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5