neurostats-API 0.0.23b2__py3-none-any.whl → 0.0.24.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ class InstitutionFetcher(StatsFetcher):
19
19
 
20
20
  def __init__(self, ticker, db_client):
21
21
  super().__init__(ticker, db_client)
22
+ self.tej_collection = self.db['TWN/APISHRACT'] # TEJ備援
22
23
 
23
24
  def prepare_query(self, start_date, end_date):
24
25
  pipeline = super().prepare_query()
@@ -107,9 +108,6 @@ class InstitutionFetcher(StatsFetcher):
107
108
  hour=0, minute=0, second=0, microsecond=0
108
109
  )
109
110
  except Exception as e:
110
- print(
111
- f"No updated time for institution_trading in {self.ticker}, use current time instead"
112
- )
113
111
  end_date = datetime.now(self.timezone)
114
112
  end_date = end_date.replace(
115
113
  hour=0, minute=0, second=0, microsecond=0
@@ -123,14 +121,22 @@ class InstitutionFetcher(StatsFetcher):
123
121
  fetched_data = self.collect_data(start_date, end_date)
124
122
 
125
123
  fetched_data['daily_data'] = sorted(
126
- fetched_data['daily_data'], key=lambda x: x['date'], reverse=True
124
+ fetched_data.get("daily_data", []), key=lambda x: x['date'], reverse=True
127
125
  )
128
126
 
127
+ if (not fetched_data['institution_trading']):
128
+ # 找 TEJ 備援
129
+ fetched_data.update(
130
+ {
131
+ 'institution_trading': self.collect_tej(start_date, end_date)
132
+ }
133
+ )
134
+
129
135
  fetched_data['institution_trading'] = sorted(
130
- fetched_data['institution_trading'],
136
+ fetched_data.get('institution_trading', []),
131
137
  key=lambda x: x['date'],
132
138
  reverse=True
133
- ) if (fetched_data['institution_trading']) else []
139
+ ) if fetched_data['institution_trading'] else None
134
140
 
135
141
  table_dict = self.process_data(fetched_data)
136
142
 
@@ -227,7 +233,6 @@ class InstitutionFetcher(StatsFetcher):
227
233
  table_dict['annual_trading'] = self.process_annual_trading(
228
234
  annual_dates, annual_trading_skip
229
235
  )
230
-
231
236
 
232
237
  return table_dict
233
238
 
@@ -343,3 +348,71 @@ class InstitutionFetcher(StatsFetcher):
343
348
  "percentage": 0
344
349
  },
345
350
  }
351
+
352
+ def collect_tej(self, start_date, end_date):
353
+ pipeline = self.prepare_tej_query(start_date, end_date)
354
+
355
+ result = self.tej_collection.aggregate(pipeline)
356
+ result = result.to_list()
357
+
358
+ result_df = pd.DataFrame(result[0]['filtered_data'])
359
+
360
+ proj_columns = {
361
+ "mdate": "date",
362
+ "qfii_buy": "外資買進股數",
363
+ "qfii_sell": "外資賣出股數",
364
+ "qfii_ex": "外資買賣超股數",
365
+ "fund_buy" : "投信買進股數",
366
+ "fund_sell" : "投信賣出股數",
367
+ "fund_ex" : "投信買賣超股數",
368
+ "dlrp_buy" : "自營商買賣超股數(自行)",
369
+ "dlrp_sell" : "自營商買進股數(自行)",
370
+ "dlrp_ex" : "自營商賣出股數(自行)",
371
+ "dlrh_buy" : "自營商買進股數(避險)",
372
+ "dlrh_sell" : "自營商賣出股數(避險)",
373
+ "dlrh_ex" : "自營買賣超股數(避險)",
374
+ "tot_ex" : "三大法人買賣超股數"
375
+ }
376
+
377
+ target_index = list(proj_columns.keys())[1:]
378
+ result_df.loc[:, target_index] = result_df.loc[:, target_index].map(lambda x : 1000 * x) # TEJ單位為千股
379
+
380
+ result_df = result_df.loc[:, list(proj_columns.keys())]
381
+ result_df = result_df.reindex(columns = list(proj_columns.keys()), fill_value = None)
382
+ result_df = result_df.rename(columns = proj_columns)
383
+
384
+ return result_df.to_dict(orient = 'records')
385
+
386
+ def prepare_tej_query(self, start_date, end_date):
387
+
388
+ return [
389
+ {
390
+ "$match": {
391
+ "ticker": { "$eq": self.ticker }
392
+ }
393
+ },
394
+ {
395
+ "$project": {
396
+ "_id": 0,
397
+ "ticker": 1,
398
+ "filtered_data": {
399
+ "$map": {
400
+ "input": {
401
+ "$filter": {
402
+ "input": "$data",
403
+ "as": "item",
404
+ "cond": {
405
+ "$and": [
406
+ { "$gte": ["$$item.mdate", start_date] },
407
+ { "$lte": ["$$item.mdate", end_date] }
408
+ ]
409
+ }
410
+ }
411
+ },
412
+ "as": "filtered_item",
413
+ "in": "$$filtered_item"
414
+ }
415
+ }
416
+ }
417
+ }
418
+ ]
@@ -0,0 +1,8 @@
1
+ from .base import StatsFetcher
2
+ from datetime import datetime, timedelta, date
3
+ import json
4
+ import numpy as np
5
+ import pandas as pd
6
+ from ..utils import StatsDateTime, StatsProcessor
7
+ import importlib.resources as pkg_resources
8
+ import yaml
@@ -121,9 +121,6 @@ class MarginTradingFetcher(StatsFetcher):
121
121
  hour=0, minute=0, second=0, microsecond=0
122
122
  )
123
123
  except Exception as e:
124
- print(
125
- f"No updated time for institution_trading in {self.ticker}, use current time instead"
126
- )
127
124
  end_date = datetime.now(self.timezone)
128
125
  end_date = end_date.replace(
129
126
  hour=0, minute=0, second=0, microsecond=0
@@ -4,6 +4,8 @@ import pandas as pd
4
4
  from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
5
5
  import importlib.resources as pkg_resources
6
6
  import yaml
7
+ import traceback
8
+ import logging
7
9
 
8
10
 
9
11
  class MonthRevenueFetcher(StatsFetcher):
@@ -11,151 +13,183 @@ class MonthRevenueFetcher(StatsFetcher):
11
13
  iFa.ai: 財務分析 -> 每月營收
12
14
  """
13
15
 
14
- def __init__(self, ticker, db_client):
16
+ def __init__(self, ticker, db_client, logger = None):
15
17
  super().__init__(ticker, db_client)
18
+ self.logger = logger or logging.getLogger(__name__)
16
19
 
17
- def prepare_query(self, target_year, target_month):
20
+ def _prepare_query(self, target_year, target_month):
18
21
  pipeline = super().prepare_query()
19
22
 
20
- pipeline.append({
21
- "$project": {
22
- "_id": 0,
23
- "ticker": 1,
24
- "company_name": 1,
25
- "monthly_data": {
26
- "$sortArray": {
27
- "input": "$monthly_data",
28
- "sortBy": {
29
- "year": -1,
30
- "month": -1
23
+ pipeline.append(
24
+ {
25
+ "$project": {
26
+ "_id": 0,
27
+ "ticker": 1,
28
+ "company_name": 1,
29
+ "monthly_data": {
30
+ "$sortArray": {
31
+ "input": "$monthly_data",
32
+ "sortBy": {
33
+ "year": -1,
34
+ "month": -1
35
+ }
31
36
  }
32
- }
33
- },
37
+ },
38
+ }
34
39
  }
35
- })
40
+ )
36
41
 
37
42
  return pipeline
38
43
 
39
44
  def collect_data(self, target_year, target_month):
40
- pipeline = self.prepare_query(target_year, target_month)
41
-
45
+ pipeline = self._prepare_query(target_year, target_month)
42
46
  fetched_data = self.collection.aggregate(pipeline)
47
+ fetched_data = fetched_data.to_list()
43
48
 
44
- fetched_data = list(fetched_data)
45
-
46
- return fetched_data[-1]
49
+ return fetched_data
47
50
 
48
51
  def query_data(self):
49
- try:
50
- latest_time = StatsDateTime.get_latest_time(
51
- self.ticker, self.collection)['last_update_time']
52
- target_year = latest_time['monthly_data']['latest_year']
53
- target_month = latest_time['monthly_data']['latest_month']
54
- except Exception as e:
55
- today = StatsDateTime.get_today()
56
- target_month = today.month
57
- target_year = today.year
52
+ target_year, target_month = self._get_target_year_and_month()
58
53
 
59
54
  # Query data
60
55
  fetched_data = self.collect_data(target_year, target_month)
61
56
 
62
- return self.process_data(fetched_data)
57
+ try:
58
+ return self._process_data(fetched_data[-1])
59
+ except Exception:
60
+ recent_date = []
61
+ for _ in range(12):
62
+ recent_date.append(f"{target_year}/{target_month}")
63
+ target_year, target_month = (
64
+ target_year - 1, 12
65
+ ) if target_month == 1 else (target_year, target_month - 1)
66
+
67
+ # logging.warning(f"{self.ticker}: No monthly revenue data in TWSE mongoDB", exc_info=True)
68
+ return self._get_empty_structure(target_year, target_month)
69
+
70
+ def _process_data(self, fetched_data):
63
71
 
64
- def process_data(self, fetched_data):
72
+ monthly_data = fetched_data.get('monthly_data', [])
73
+ if not monthly_data:
74
+ raise ValueError("monthly_data is empty or missing")
65
75
 
66
- monthly_data = fetched_data['monthly_data']
67
76
  for data in monthly_data:
68
77
  for key, value in data.items():
69
- if ("YoY" in key):
78
+ if "YoY" in key:
70
79
  data[key] = StatsProcessor.cal_percentage(value)
71
- elif ("ratio" in key or 'percentage' in key):
72
- data[key] = StatsProcessor.cal_non_percentage(value,
73
- to_str=True,
74
- postfix="%")
75
- elif (key not in ('year', 'month')):
76
- data[key] = StatsProcessor.cal_non_percentage(value,
77
- postfix="千元")
78
- target_month = monthly_data[0]['month']
79
- monthly_df = pd.DataFrame(monthly_data)
80
+ elif "ratio" in key or "percentage" in key:
81
+ data[key] = StatsProcessor.cal_non_percentage(value, to_str=True, postfix="%")
82
+ elif key not in ('year', 'month'):
83
+ data[key] = StatsProcessor.cal_non_percentage(value, postfix="千元")
80
84
 
85
+ monthly_df = pd.DataFrame(monthly_data)
86
+ target_month = monthly_data[0]['month']
81
87
  target_month_df = monthly_df[monthly_df['month'] == target_month]
82
88
  annual_month_df = monthly_df[monthly_df['month'] == 12]
83
- month_revenue_df = monthly_df.pivot(index='month',
84
- columns='year',
85
- values='revenue')
86
-
87
- grand_total_df = target_month_df.pivot(index='month',
88
- columns='year',
89
- values='grand_total')
90
89
 
91
- annual_total_df = annual_month_df.pivot(index='month',
92
- columns='year',
93
- values='grand_total')
94
-
95
- grand_total_df.rename(index={target_month: f"grand_total"},
96
- inplace=True)
90
+
91
+ month_revenue_df = monthly_df.pivot(
92
+ index='month', columns='year', values='revenue'
93
+ )
97
94
  month_revenue_df = month_revenue_df.sort_index(ascending=False)
98
- month_revenue_df = pd.concat([grand_total_df, month_revenue_df],
99
- axis=0)
100
-
101
- fetched_data['month_revenue'] = month_revenue_df[sorted(
102
- month_revenue_df.columns, reverse=True)]
103
- # 歷年月營收
104
- fetched_data[
105
- 'this_month_revenue_over_years'] = target_month_df.set_index(
106
- "year")[[
107
- "revenue", "revenue_increment_ratio", "YoY_1", "YoY_3",
108
- "YoY_5", "YoY_10"
109
- ]].T
110
- # 歷年營收成長量
111
- fetched_data['grand_total_over_years'] = target_month_df.set_index(
112
- "year")[[
113
- "grand_total", "grand_total_increment_ratio",
114
- "grand_total_YoY_1", "grand_total_YoY_3", "grand_total_YoY_5",
115
- "grand_total_YoY_10"
116
- ]].T
117
95
 
118
- fetched_data.pop("monthly_data")
96
+ grand_total_df = target_month_df.pivot(
97
+ index='month', columns='year', values='grand_total'
98
+ )
99
+ grand_total_df.rename(
100
+ index={target_month: f"grand_total"}, inplace=True
101
+ )
102
+ month_revenue_df = pd.concat([grand_total_df, month_revenue_df], axis=0)
119
103
 
120
- fetched_data['recent_month_revenue'] = self.get_recent_revenue_grwoth(
121
- monthly_data, grand_total_dict=annual_total_df.to_dict(), interval = 12
104
+ annual_total_df = annual_month_df.pivot(
105
+ index='month', columns='year', values='grand_total'
106
+ )
107
+
108
+ fetched_data.update(
109
+ {
110
+ "month_revenue": month_revenue_df[sorted(month_revenue_df.columns, reverse=True)],
111
+ "this_month_revenue_over_years": target_month_df.set_index("year")[[
112
+ "revenue", "revenue_increment_ratio", "YoY_1", "YoY_3", "YoY_5", "YoY_10"
113
+ ]].T,
114
+ "grand_total_over_years": target_month_df.set_index("year")[[
115
+ "grand_total", "grand_total_increment_ratio", "grand_total_YoY_1", "grand_total_YoY_3", "grand_total_YoY_5", "grand_total_YoY_10"
116
+ ]].T,
117
+ "recent_month_revenue": self._get_recent_growth(monthly_data, grand_total_dict=annual_total_df.to_dict(), interval=12)
118
+ }
122
119
  )
123
120
 
121
+ fetched_data.pop("monthly_data")
124
122
  return fetched_data
125
123
 
126
- def get_recent_revenue_grwoth(self, monthly_data, grand_total_dict, interval: int = 12):
127
- recent_month_data = monthly_data[:interval + 1]
124
+ def _get_recent_growth(self, monthly_data, grand_total_dict, interval=12):
125
+ last_month_data = monthly_data[1:interval + 1] + [{}] * max(0, interval - len(monthly_data) + 1)
128
126
 
129
127
  MoMs = [
130
- YoY_Calculator.cal_growth(this_value['revenue'], last_value['revenue'], delta = 1)
131
- for this_value, last_value in zip(
132
- recent_month_data[:12], recent_month_data[1:13]
133
- )
128
+ YoY_Calculator.cal_growth(this.get('revenue'), last.get('revenue'), delta = 1)
129
+ for this, last in zip(monthly_data[:interval], last_month_data[:interval])
134
130
  ]
135
131
 
136
- recent_month_data = {
137
- "date" : [f"{data['year']}/{data['month']}" for data in recent_month_data[:interval]],
138
- "revenue" : [data['revenue'] for data in recent_month_data[:interval]],
139
- "MoM" : [f"{(data * 100):.2f}%" for data in MoMs],
140
- "YoY" : [f"{data['revenue_increment_ratio']}" for data in recent_month_data[:interval]],
141
- "total_YoY": [f"{data['grand_total_increment_ratio']}" for data in recent_month_data[:interval]],
142
- }
143
-
144
- # accum_YoY
145
- # accum_YoY 為 Davis提出的定義
146
- # 2024/6的累計YoY(accum_YoY) 為 2024累計到6月為止的總營收/2023年度總營收
147
- accum_YoYs = []
148
- for data in monthly_data[:interval]:
132
+ def safe_accum_yoy(data):
149
133
  try:
150
134
  year = data['year'] - 1
151
135
  total = grand_total_dict[year][12]
152
- accum_YoY = round(((data['grand_total'] - total) / total) * 100, 2)
153
- accum_YoYs.append(f"{accum_YoY}%")
154
- except Exception as e:
155
- accum_YoYs.append(None)
156
-
157
- recent_month_data['accum_YoY'] = accum_YoYs
136
+ grand_total = data.get('grand_total')
137
+ return f"{round(((grand_total - total) / total) * 100, 2)}%"
138
+ except Exception:
139
+ self.logger.debug(f"accum_YoY calc failed for year={data.get('year')} / ticker={self.ticker}", exc_info=True)
140
+ return None
158
141
 
159
- recent_month_df = pd.DataFrame(recent_month_data).set_index('date').T
142
+ recent_month_data = {
143
+ "date": [f"{d.get('year', 0)}/{d.get('month', 0)}" for d in monthly_data[:interval]],
144
+ "revenue": [d.get('revenue') for d in monthly_data[:interval]],
145
+ "MoM": [f"{(m * 100):.2f}%" if isinstance(m, float) else None for m in MoMs],
146
+ "YoY": [d.get('revenue_increment_ratio') for d in monthly_data[:interval]],
147
+ "total_YoY": [d.get('grand_total_increment_ratio') for d in monthly_data[:interval]],
148
+ # accum_YoY
149
+ # accum_YoY 為 Davis提出的定義
150
+ # 2024/6的累計YoY(accum_YoY) 為 2024累計到6月為止的總營收/2023年度總營收
151
+ "accum_YoY": [safe_accum_yoy(d) for d in monthly_data[:interval]]
152
+ }
160
153
 
161
- return recent_month_df
154
+ df = pd.DataFrame(recent_month_data)
155
+ return df[df['date'] != "0/0"].set_index('date').T
156
+
157
+
158
+ def _get_empty_structure(self, target_year, target_month):
159
+ """
160
+ Exception 發生時回傳
161
+ """
162
+ recent_date = [f"{target_year}/{target_month}"]
163
+ for _ in range(11):
164
+ target_year, target_month = (target_year - 1, 12) if target_month == 1 else (target_year, target_month - 1)
165
+ recent_date.append(f"{target_year}/{target_month}")
166
+
167
+ def empty_df(index, columns):
168
+ return pd.DataFrame(index=index, columns=columns)
169
+
170
+ return {
171
+ "month_revenue": empty_df(
172
+ index=pd.Index(['grand_total', 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1], dtype='object', name='month'),
173
+ columns=pd.Index([f"{target_year - i}" for i in range(10)], dtype=object, name='year')
174
+ ),
175
+ "this_month_revenue_over_years": empty_df(
176
+ index=pd.Index(['revenue', 'revenue_increment_ratio', 'YoY_1', 'YoY_3', 'YoY_5', 'YoY_10'], dtype='object'),
177
+ columns=pd.Index([f"{target_year - i}" for i in range(10)], dtype='int64', name='year')
178
+ ),
179
+ "grand_total_over_years": empty_df(
180
+ index=pd.Index(['grand_total', 'grand_total_increment_ratio', 'grand_total_YoY_1', 'grand_total_YoY_3', 'grand_total_YoY_5', 'grand_total_YoY_10'], dtype='object'),
181
+ columns=pd.Index([f"{target_year - i}" for i in range(10)], dtype='int64', name='year')
182
+ ),
183
+ "recent_month_revenue": empty_df(
184
+ index=pd.Index(['revenue', 'MoM', 'YoY', 'total_YoY', 'accum_YoY'], dtype='object'),
185
+ columns=pd.Index([], dtype = 'object', name = 'date')
186
+ )
187
+ }
188
+
189
+ def _get_target_year_and_month(self):
190
+ try:
191
+ latest_time = StatsDateTime.get_latest_time(self.ticker, self.collection)['last_update_time']
192
+ return latest_time['monthly_data']['latest_year'], latest_time['monthly_data']['latest_month']
193
+ except Exception:
194
+ today = StatsDateTime.get_today()
195
+ return today.year, today.month