neurostats-API 0.0.23b2__py3-none-any.whl → 0.0.24.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neurostats_API/__init__.py +1 -1
- neurostats_API/fetchers/balance_sheet.py +64 -40
- neurostats_API/fetchers/base.py +6 -2
- neurostats_API/fetchers/cash_flow.py +91 -70
- neurostats_API/fetchers/finance_overview.py +26 -26
- neurostats_API/fetchers/institution.py +80 -7
- neurostats_API/fetchers/macro_daily_event.py +8 -0
- neurostats_API/fetchers/margin_trading.py +0 -3
- neurostats_API/fetchers/month_revenue.py +139 -105
- neurostats_API/fetchers/profit_lose.py +103 -83
- neurostats_API/fetchers/tech.py +0 -58
- neurostats_API/fetchers/tej_finance_report.py +101 -69
- neurostats_API/fetchers/value_invest.py +26 -9
- neurostats_API/utils/calculate_value.py +5 -2
- neurostats_API/utils/data_process.py +12 -6
- neurostats_API/utils/logger.py +21 -0
- {neurostats_API-0.0.23b2.dist-info → neurostats_API-0.0.24.post1.dist-info}/METADATA +2 -2
- neurostats_API-0.0.24.post1.dist-info/RECORD +36 -0
- neurostats_API-0.0.23b2.dist-info/RECORD +0 -34
- {neurostats_API-0.0.23b2.dist-info → neurostats_API-0.0.24.post1.dist-info}/WHEEL +0 -0
- {neurostats_API-0.0.23b2.dist-info → neurostats_API-0.0.24.post1.dist-info}/top_level.txt +0 -0
@@ -19,6 +19,7 @@ class InstitutionFetcher(StatsFetcher):
|
|
19
19
|
|
20
20
|
def __init__(self, ticker, db_client):
|
21
21
|
super().__init__(ticker, db_client)
|
22
|
+
self.tej_collection = self.db['TWN/APISHRACT'] # TEJ備援
|
22
23
|
|
23
24
|
def prepare_query(self, start_date, end_date):
|
24
25
|
pipeline = super().prepare_query()
|
@@ -107,9 +108,6 @@ class InstitutionFetcher(StatsFetcher):
|
|
107
108
|
hour=0, minute=0, second=0, microsecond=0
|
108
109
|
)
|
109
110
|
except Exception as e:
|
110
|
-
print(
|
111
|
-
f"No updated time for institution_trading in {self.ticker}, use current time instead"
|
112
|
-
)
|
113
111
|
end_date = datetime.now(self.timezone)
|
114
112
|
end_date = end_date.replace(
|
115
113
|
hour=0, minute=0, second=0, microsecond=0
|
@@ -123,14 +121,22 @@ class InstitutionFetcher(StatsFetcher):
|
|
123
121
|
fetched_data = self.collect_data(start_date, end_date)
|
124
122
|
|
125
123
|
fetched_data['daily_data'] = sorted(
|
126
|
-
fetched_data[
|
124
|
+
fetched_data.get("daily_data", []), key=lambda x: x['date'], reverse=True
|
127
125
|
)
|
128
126
|
|
127
|
+
if (not fetched_data['institution_trading']):
|
128
|
+
# 找 TEJ 備援
|
129
|
+
fetched_data.update(
|
130
|
+
{
|
131
|
+
'institution_trading': self.collect_tej(start_date, end_date)
|
132
|
+
}
|
133
|
+
)
|
134
|
+
|
129
135
|
fetched_data['institution_trading'] = sorted(
|
130
|
-
fetched_data
|
136
|
+
fetched_data.get('institution_trading', []),
|
131
137
|
key=lambda x: x['date'],
|
132
138
|
reverse=True
|
133
|
-
) if
|
139
|
+
) if fetched_data['institution_trading'] else None
|
134
140
|
|
135
141
|
table_dict = self.process_data(fetched_data)
|
136
142
|
|
@@ -227,7 +233,6 @@ class InstitutionFetcher(StatsFetcher):
|
|
227
233
|
table_dict['annual_trading'] = self.process_annual_trading(
|
228
234
|
annual_dates, annual_trading_skip
|
229
235
|
)
|
230
|
-
|
231
236
|
|
232
237
|
return table_dict
|
233
238
|
|
@@ -343,3 +348,71 @@ class InstitutionFetcher(StatsFetcher):
|
|
343
348
|
"percentage": 0
|
344
349
|
},
|
345
350
|
}
|
351
|
+
|
352
|
+
def collect_tej(self, start_date, end_date):
|
353
|
+
pipeline = self.prepare_tej_query(start_date, end_date)
|
354
|
+
|
355
|
+
result = self.tej_collection.aggregate(pipeline)
|
356
|
+
result = result.to_list()
|
357
|
+
|
358
|
+
result_df = pd.DataFrame(result[0]['filtered_data'])
|
359
|
+
|
360
|
+
proj_columns = {
|
361
|
+
"mdate": "date",
|
362
|
+
"qfii_buy": "外資買進股數",
|
363
|
+
"qfii_sell": "外資賣出股數",
|
364
|
+
"qfii_ex": "外資買賣超股數",
|
365
|
+
"fund_buy" : "投信買進股數",
|
366
|
+
"fund_sell" : "投信賣出股數",
|
367
|
+
"fund_ex" : "投信買賣超股數",
|
368
|
+
"dlrp_buy" : "自營商買賣超股數(自行)",
|
369
|
+
"dlrp_sell" : "自營商買進股數(自行)",
|
370
|
+
"dlrp_ex" : "自營商賣出股數(自行)",
|
371
|
+
"dlrh_buy" : "自營商買進股數(避險)",
|
372
|
+
"dlrh_sell" : "自營商賣出股數(避險)",
|
373
|
+
"dlrh_ex" : "自營買賣超股數(避險)",
|
374
|
+
"tot_ex" : "三大法人買賣超股數"
|
375
|
+
}
|
376
|
+
|
377
|
+
target_index = list(proj_columns.keys())[1:]
|
378
|
+
result_df.loc[:, target_index] = result_df.loc[:, target_index].map(lambda x : 1000 * x) # TEJ單位為千股
|
379
|
+
|
380
|
+
result_df = result_df.loc[:, list(proj_columns.keys())]
|
381
|
+
result_df = result_df.reindex(columns = list(proj_columns.keys()), fill_value = None)
|
382
|
+
result_df = result_df.rename(columns = proj_columns)
|
383
|
+
|
384
|
+
return result_df.to_dict(orient = 'records')
|
385
|
+
|
386
|
+
def prepare_tej_query(self, start_date, end_date):
|
387
|
+
|
388
|
+
return [
|
389
|
+
{
|
390
|
+
"$match": {
|
391
|
+
"ticker": { "$eq": self.ticker }
|
392
|
+
}
|
393
|
+
},
|
394
|
+
{
|
395
|
+
"$project": {
|
396
|
+
"_id": 0,
|
397
|
+
"ticker": 1,
|
398
|
+
"filtered_data": {
|
399
|
+
"$map": {
|
400
|
+
"input": {
|
401
|
+
"$filter": {
|
402
|
+
"input": "$data",
|
403
|
+
"as": "item",
|
404
|
+
"cond": {
|
405
|
+
"$and": [
|
406
|
+
{ "$gte": ["$$item.mdate", start_date] },
|
407
|
+
{ "$lte": ["$$item.mdate", end_date] }
|
408
|
+
]
|
409
|
+
}
|
410
|
+
}
|
411
|
+
},
|
412
|
+
"as": "filtered_item",
|
413
|
+
"in": "$$filtered_item"
|
414
|
+
}
|
415
|
+
}
|
416
|
+
}
|
417
|
+
}
|
418
|
+
]
|
@@ -121,9 +121,6 @@ class MarginTradingFetcher(StatsFetcher):
|
|
121
121
|
hour=0, minute=0, second=0, microsecond=0
|
122
122
|
)
|
123
123
|
except Exception as e:
|
124
|
-
print(
|
125
|
-
f"No updated time for institution_trading in {self.ticker}, use current time instead"
|
126
|
-
)
|
127
124
|
end_date = datetime.now(self.timezone)
|
128
125
|
end_date = end_date.replace(
|
129
126
|
hour=0, minute=0, second=0, microsecond=0
|
@@ -4,6 +4,8 @@ import pandas as pd
|
|
4
4
|
from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
|
5
5
|
import importlib.resources as pkg_resources
|
6
6
|
import yaml
|
7
|
+
import traceback
|
8
|
+
import logging
|
7
9
|
|
8
10
|
|
9
11
|
class MonthRevenueFetcher(StatsFetcher):
|
@@ -11,151 +13,183 @@ class MonthRevenueFetcher(StatsFetcher):
|
|
11
13
|
iFa.ai: 財務分析 -> 每月營收
|
12
14
|
"""
|
13
15
|
|
14
|
-
def __init__(self, ticker, db_client):
|
16
|
+
def __init__(self, ticker, db_client, logger = None):
|
15
17
|
super().__init__(ticker, db_client)
|
18
|
+
self.logger = logger or logging.getLogger(__name__)
|
16
19
|
|
17
|
-
def
|
20
|
+
def _prepare_query(self, target_year, target_month):
|
18
21
|
pipeline = super().prepare_query()
|
19
22
|
|
20
|
-
pipeline.append(
|
21
|
-
|
22
|
-
"
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
"
|
27
|
-
"
|
28
|
-
|
29
|
-
"
|
30
|
-
|
23
|
+
pipeline.append(
|
24
|
+
{
|
25
|
+
"$project": {
|
26
|
+
"_id": 0,
|
27
|
+
"ticker": 1,
|
28
|
+
"company_name": 1,
|
29
|
+
"monthly_data": {
|
30
|
+
"$sortArray": {
|
31
|
+
"input": "$monthly_data",
|
32
|
+
"sortBy": {
|
33
|
+
"year": -1,
|
34
|
+
"month": -1
|
35
|
+
}
|
31
36
|
}
|
32
|
-
}
|
33
|
-
}
|
37
|
+
},
|
38
|
+
}
|
34
39
|
}
|
35
|
-
|
40
|
+
)
|
36
41
|
|
37
42
|
return pipeline
|
38
43
|
|
39
44
|
def collect_data(self, target_year, target_month):
|
40
|
-
pipeline = self.
|
41
|
-
|
45
|
+
pipeline = self._prepare_query(target_year, target_month)
|
42
46
|
fetched_data = self.collection.aggregate(pipeline)
|
47
|
+
fetched_data = fetched_data.to_list()
|
43
48
|
|
44
|
-
|
45
|
-
|
46
|
-
return fetched_data[-1]
|
49
|
+
return fetched_data
|
47
50
|
|
48
51
|
def query_data(self):
|
49
|
-
|
50
|
-
latest_time = StatsDateTime.get_latest_time(
|
51
|
-
self.ticker, self.collection)['last_update_time']
|
52
|
-
target_year = latest_time['monthly_data']['latest_year']
|
53
|
-
target_month = latest_time['monthly_data']['latest_month']
|
54
|
-
except Exception as e:
|
55
|
-
today = StatsDateTime.get_today()
|
56
|
-
target_month = today.month
|
57
|
-
target_year = today.year
|
52
|
+
target_year, target_month = self._get_target_year_and_month()
|
58
53
|
|
59
54
|
# Query data
|
60
55
|
fetched_data = self.collect_data(target_year, target_month)
|
61
56
|
|
62
|
-
|
57
|
+
try:
|
58
|
+
return self._process_data(fetched_data[-1])
|
59
|
+
except Exception:
|
60
|
+
recent_date = []
|
61
|
+
for _ in range(12):
|
62
|
+
recent_date.append(f"{target_year}/{target_month}")
|
63
|
+
target_year, target_month = (
|
64
|
+
target_year - 1, 12
|
65
|
+
) if target_month == 1 else (target_year, target_month - 1)
|
66
|
+
|
67
|
+
# logging.warning(f"{self.ticker}: No monthly revenue data in TWSE mongoDB", exc_info=True)
|
68
|
+
return self._get_empty_structure(target_year, target_month)
|
69
|
+
|
70
|
+
def _process_data(self, fetched_data):
|
63
71
|
|
64
|
-
|
72
|
+
monthly_data = fetched_data.get('monthly_data', [])
|
73
|
+
if not monthly_data:
|
74
|
+
raise ValueError("monthly_data is empty or missing")
|
65
75
|
|
66
|
-
monthly_data = fetched_data['monthly_data']
|
67
76
|
for data in monthly_data:
|
68
77
|
for key, value in data.items():
|
69
|
-
if
|
78
|
+
if "YoY" in key:
|
70
79
|
data[key] = StatsProcessor.cal_percentage(value)
|
71
|
-
elif
|
72
|
-
data[key] = StatsProcessor.cal_non_percentage(value,
|
73
|
-
|
74
|
-
|
75
|
-
elif (key not in ('year', 'month')):
|
76
|
-
data[key] = StatsProcessor.cal_non_percentage(value,
|
77
|
-
postfix="千元")
|
78
|
-
target_month = monthly_data[0]['month']
|
79
|
-
monthly_df = pd.DataFrame(monthly_data)
|
80
|
+
elif "ratio" in key or "percentage" in key:
|
81
|
+
data[key] = StatsProcessor.cal_non_percentage(value, to_str=True, postfix="%")
|
82
|
+
elif key not in ('year', 'month'):
|
83
|
+
data[key] = StatsProcessor.cal_non_percentage(value, postfix="千元")
|
80
84
|
|
85
|
+
monthly_df = pd.DataFrame(monthly_data)
|
86
|
+
target_month = monthly_data[0]['month']
|
81
87
|
target_month_df = monthly_df[monthly_df['month'] == target_month]
|
82
88
|
annual_month_df = monthly_df[monthly_df['month'] == 12]
|
83
|
-
month_revenue_df = monthly_df.pivot(index='month',
|
84
|
-
columns='year',
|
85
|
-
values='revenue')
|
86
|
-
|
87
|
-
grand_total_df = target_month_df.pivot(index='month',
|
88
|
-
columns='year',
|
89
|
-
values='grand_total')
|
90
89
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
grand_total_df.rename(index={target_month: f"grand_total"},
|
96
|
-
inplace=True)
|
90
|
+
|
91
|
+
month_revenue_df = monthly_df.pivot(
|
92
|
+
index='month', columns='year', values='revenue'
|
93
|
+
)
|
97
94
|
month_revenue_df = month_revenue_df.sort_index(ascending=False)
|
98
|
-
month_revenue_df = pd.concat([grand_total_df, month_revenue_df],
|
99
|
-
axis=0)
|
100
|
-
|
101
|
-
fetched_data['month_revenue'] = month_revenue_df[sorted(
|
102
|
-
month_revenue_df.columns, reverse=True)]
|
103
|
-
# 歷年月營收
|
104
|
-
fetched_data[
|
105
|
-
'this_month_revenue_over_years'] = target_month_df.set_index(
|
106
|
-
"year")[[
|
107
|
-
"revenue", "revenue_increment_ratio", "YoY_1", "YoY_3",
|
108
|
-
"YoY_5", "YoY_10"
|
109
|
-
]].T
|
110
|
-
# 歷年營收成長量
|
111
|
-
fetched_data['grand_total_over_years'] = target_month_df.set_index(
|
112
|
-
"year")[[
|
113
|
-
"grand_total", "grand_total_increment_ratio",
|
114
|
-
"grand_total_YoY_1", "grand_total_YoY_3", "grand_total_YoY_5",
|
115
|
-
"grand_total_YoY_10"
|
116
|
-
]].T
|
117
95
|
|
118
|
-
|
96
|
+
grand_total_df = target_month_df.pivot(
|
97
|
+
index='month', columns='year', values='grand_total'
|
98
|
+
)
|
99
|
+
grand_total_df.rename(
|
100
|
+
index={target_month: f"grand_total"}, inplace=True
|
101
|
+
)
|
102
|
+
month_revenue_df = pd.concat([grand_total_df, month_revenue_df], axis=0)
|
119
103
|
|
120
|
-
|
121
|
-
|
104
|
+
annual_total_df = annual_month_df.pivot(
|
105
|
+
index='month', columns='year', values='grand_total'
|
106
|
+
)
|
107
|
+
|
108
|
+
fetched_data.update(
|
109
|
+
{
|
110
|
+
"month_revenue": month_revenue_df[sorted(month_revenue_df.columns, reverse=True)],
|
111
|
+
"this_month_revenue_over_years": target_month_df.set_index("year")[[
|
112
|
+
"revenue", "revenue_increment_ratio", "YoY_1", "YoY_3", "YoY_5", "YoY_10"
|
113
|
+
]].T,
|
114
|
+
"grand_total_over_years": target_month_df.set_index("year")[[
|
115
|
+
"grand_total", "grand_total_increment_ratio", "grand_total_YoY_1", "grand_total_YoY_3", "grand_total_YoY_5", "grand_total_YoY_10"
|
116
|
+
]].T,
|
117
|
+
"recent_month_revenue": self._get_recent_growth(monthly_data, grand_total_dict=annual_total_df.to_dict(), interval=12)
|
118
|
+
}
|
122
119
|
)
|
123
120
|
|
121
|
+
fetched_data.pop("monthly_data")
|
124
122
|
return fetched_data
|
125
123
|
|
126
|
-
def
|
127
|
-
|
124
|
+
def _get_recent_growth(self, monthly_data, grand_total_dict, interval=12):
|
125
|
+
last_month_data = monthly_data[1:interval + 1] + [{}] * max(0, interval - len(monthly_data) + 1)
|
128
126
|
|
129
127
|
MoMs = [
|
130
|
-
YoY_Calculator.cal_growth(
|
131
|
-
for
|
132
|
-
recent_month_data[:12], recent_month_data[1:13]
|
133
|
-
)
|
128
|
+
YoY_Calculator.cal_growth(this.get('revenue'), last.get('revenue'), delta = 1)
|
129
|
+
for this, last in zip(monthly_data[:interval], last_month_data[:interval])
|
134
130
|
]
|
135
131
|
|
136
|
-
|
137
|
-
"date" : [f"{data['year']}/{data['month']}" for data in recent_month_data[:interval]],
|
138
|
-
"revenue" : [data['revenue'] for data in recent_month_data[:interval]],
|
139
|
-
"MoM" : [f"{(data * 100):.2f}%" for data in MoMs],
|
140
|
-
"YoY" : [f"{data['revenue_increment_ratio']}" for data in recent_month_data[:interval]],
|
141
|
-
"total_YoY": [f"{data['grand_total_increment_ratio']}" for data in recent_month_data[:interval]],
|
142
|
-
}
|
143
|
-
|
144
|
-
# accum_YoY
|
145
|
-
# accum_YoY 為 Davis提出的定義
|
146
|
-
# 2024/6的累計YoY(accum_YoY) 為 2024累計到6月為止的總營收/2023年度總營收
|
147
|
-
accum_YoYs = []
|
148
|
-
for data in monthly_data[:interval]:
|
132
|
+
def safe_accum_yoy(data):
|
149
133
|
try:
|
150
134
|
year = data['year'] - 1
|
151
135
|
total = grand_total_dict[year][12]
|
152
|
-
|
153
|
-
|
154
|
-
except Exception
|
155
|
-
|
156
|
-
|
157
|
-
recent_month_data['accum_YoY'] = accum_YoYs
|
136
|
+
grand_total = data.get('grand_total')
|
137
|
+
return f"{round(((grand_total - total) / total) * 100, 2)}%"
|
138
|
+
except Exception:
|
139
|
+
self.logger.debug(f"accum_YoY calc failed for year={data.get('year')} / ticker={self.ticker}", exc_info=True)
|
140
|
+
return None
|
158
141
|
|
159
|
-
|
142
|
+
recent_month_data = {
|
143
|
+
"date": [f"{d.get('year', 0)}/{d.get('month', 0)}" for d in monthly_data[:interval]],
|
144
|
+
"revenue": [d.get('revenue') for d in monthly_data[:interval]],
|
145
|
+
"MoM": [f"{(m * 100):.2f}%" if isinstance(m, float) else None for m in MoMs],
|
146
|
+
"YoY": [d.get('revenue_increment_ratio') for d in monthly_data[:interval]],
|
147
|
+
"total_YoY": [d.get('grand_total_increment_ratio') for d in monthly_data[:interval]],
|
148
|
+
# accum_YoY
|
149
|
+
# accum_YoY 為 Davis提出的定義
|
150
|
+
# 2024/6的累計YoY(accum_YoY) 為 2024累計到6月為止的總營收/2023年度總營收
|
151
|
+
"accum_YoY": [safe_accum_yoy(d) for d in monthly_data[:interval]]
|
152
|
+
}
|
160
153
|
|
161
|
-
|
154
|
+
df = pd.DataFrame(recent_month_data)
|
155
|
+
return df[df['date'] != "0/0"].set_index('date').T
|
156
|
+
|
157
|
+
|
158
|
+
def _get_empty_structure(self, target_year, target_month):
|
159
|
+
"""
|
160
|
+
Exception 發生時回傳
|
161
|
+
"""
|
162
|
+
recent_date = [f"{target_year}/{target_month}"]
|
163
|
+
for _ in range(11):
|
164
|
+
target_year, target_month = (target_year - 1, 12) if target_month == 1 else (target_year, target_month - 1)
|
165
|
+
recent_date.append(f"{target_year}/{target_month}")
|
166
|
+
|
167
|
+
def empty_df(index, columns):
|
168
|
+
return pd.DataFrame(index=index, columns=columns)
|
169
|
+
|
170
|
+
return {
|
171
|
+
"month_revenue": empty_df(
|
172
|
+
index=pd.Index(['grand_total', 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1], dtype='object', name='month'),
|
173
|
+
columns=pd.Index([f"{target_year - i}" for i in range(10)], dtype=object, name='year')
|
174
|
+
),
|
175
|
+
"this_month_revenue_over_years": empty_df(
|
176
|
+
index=pd.Index(['revenue', 'revenue_increment_ratio', 'YoY_1', 'YoY_3', 'YoY_5', 'YoY_10'], dtype='object'),
|
177
|
+
columns=pd.Index([f"{target_year - i}" for i in range(10)], dtype='int64', name='year')
|
178
|
+
),
|
179
|
+
"grand_total_over_years": empty_df(
|
180
|
+
index=pd.Index(['grand_total', 'grand_total_increment_ratio', 'grand_total_YoY_1', 'grand_total_YoY_3', 'grand_total_YoY_5', 'grand_total_YoY_10'], dtype='object'),
|
181
|
+
columns=pd.Index([f"{target_year - i}" for i in range(10)], dtype='int64', name='year')
|
182
|
+
),
|
183
|
+
"recent_month_revenue": empty_df(
|
184
|
+
index=pd.Index(['revenue', 'MoM', 'YoY', 'total_YoY', 'accum_YoY'], dtype='object'),
|
185
|
+
columns=pd.Index([], dtype = 'object', name = 'date')
|
186
|
+
)
|
187
|
+
}
|
188
|
+
|
189
|
+
def _get_target_year_and_month(self):
|
190
|
+
try:
|
191
|
+
latest_time = StatsDateTime.get_latest_time(self.ticker, self.collection)['last_update_time']
|
192
|
+
return latest_time['monthly_data']['latest_year'], latest_time['monthly_data']['latest_month']
|
193
|
+
except Exception:
|
194
|
+
today = StatsDateTime.get_today()
|
195
|
+
return today.year, today.month
|