neurostats-API 0.0.21b0__py3-none-any.whl → 0.0.23b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. neurostats_API/__init__.py +1 -1
  2. neurostats_API/fetchers/balance_sheet.py +138 -111
  3. neurostats_API/fetchers/base.py +89 -74
  4. neurostats_API/fetchers/cash_flow.py +120 -111
  5. neurostats_API/fetchers/finance_overview.py +2 -2
  6. neurostats_API/fetchers/month_revenue.py +1 -1
  7. neurostats_API/fetchers/profit_lose.py +188 -113
  8. neurostats_API/fetchers/tech.py +175 -42
  9. neurostats_API/fetchers/tej_finance_report.py +230 -335
  10. neurostats_API/tools/company_list/tw.json +2175 -0
  11. neurostats_API/tools/tej_db/tej_db_skip_index.yaml +3 -1
  12. neurostats_API/tools/tej_db/tej_db_thousand_index.yaml +0 -1
  13. neurostats_API/utils/__init__.py +0 -1
  14. neurostats_API/utils/calculate_value.py +99 -1
  15. neurostats_API/utils/data_process.py +43 -15
  16. {neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23b0.dist-info}/METADATA +2 -2
  17. neurostats_API-0.0.23b0.dist-info/RECORD +34 -0
  18. neurostats_API/utils/fetcher.py +0 -1056
  19. neurostats_API-0.0.21b0.dist-info/RECORD +0 -34
  20. /neurostats_API/tools/{balance_sheet.yaml → twse/balance_sheet.yaml} +0 -0
  21. /neurostats_API/tools/{cash_flow_percentage.yaml → twse/cash_flow_percentage.yaml} +0 -0
  22. /neurostats_API/tools/{finance_overview_dict.yaml → twse/finance_overview_dict.yaml} +0 -0
  23. /neurostats_API/tools/{profit_lose.yaml → twse/profit_lose.yaml} +0 -0
  24. /neurostats_API/tools/{seasonal_data_field_dict.txt → twse/seasonal_data_field_dict.txt} +0 -0
  25. {neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23b0.dist-info}/WHEEL +0 -0
  26. {neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23b0.dist-info}/top_level.txt +0 -0
@@ -18,34 +18,39 @@ class FinanceReportFetcher(BaseTEJFetcher):
18
18
  QOQ_NOCAL = 4
19
19
 
20
20
  def __init__(
21
- self,
22
- mongo_uri,
23
- db_name="company",
24
- collection_name="TWN/AINVFQ1"
25
- ):
21
+ self, mongo_uri, db_name="company", collection_name="TWN/AINVFQ1"
22
+ ):
26
23
  self.client = MongoClient(mongo_uri)
27
24
  self.db = self.client[db_name]
28
25
  self.collection = self.db[collection_name]
29
26
 
30
- index_dict = StatsProcessor.load_yaml("tej_db/tej_db_index.yaml")
31
- thousand_dict = StatsProcessor.load_yaml("tej_db/tej_db_thousand_index.yaml")
32
- percent_dict = StatsProcessor.load_yaml("tej_db/tej_db_percent_index.yaml")
33
- skip_dict = StatsProcessor.load_yaml("tej_db/tej_db_percent_index.yaml")
34
- self.check_index = set(index_dict[collection_name])
35
- self.skip_index = set(skip_dict[collection_name])
27
+ index_files = [
28
+ "tej_db/tej_db_index.yaml", "tej_db/tej_db_thousand_index.yaml",
29
+ "tej_db/tej_db_percent_index.yaml"
30
+ ]
36
31
 
37
- self.thousand_index_list = list(thousand_dict[collection_name])
38
- self.percent_index_list = list(percent_dict[collection_name])
32
+ self.index_dict, self.thousand_dict, self.percent_dict = [
33
+ StatsProcessor.load_yaml(file) for file in index_files
34
+ ]
39
35
 
36
+ self.check_index = set(self.index_dict.get(collection_name, []))
37
+ self.skip_index = set(self.percent_dict.get(collection_name, []))
38
+ self.thousand_index_list = list(
39
+ self.thousand_dict.get(collection_name, [])
40
+ )
41
+ self.percent_index_list = list(
42
+ self.percent_dict.get(collection_name, [])
43
+ )
40
44
 
41
45
  def get(
42
- self,
43
- ticker,
44
- fetch_mode: FetchMode = FetchMode.QOQ_NOCAL,
45
- start_date: str = None,
46
- end_date: str = None,
47
- report_type: str = "Q",
48
- indexes: list = []):
46
+ self,
47
+ ticker,
48
+ fetch_mode: FetchMode = FetchMode.QOQ_NOCAL,
49
+ start_date: str = None,
50
+ end_date: str = None,
51
+ report_type: str = "Q",
52
+ indexes: list = []
53
+ ):
49
54
  """
50
55
  基礎的query function
51
56
  ticker(str): 股票代碼
@@ -58,351 +63,239 @@ class FinanceReportFetcher(BaseTEJFetcher):
58
63
  indexes(List): 指定的index
59
64
  """
60
65
  # 確認indexes中是否有錯誤的index,有的話回傳warning
61
- if (indexes and self.check_index):
62
- indexes = set(indexes)
63
- difference = indexes - self.check_index
64
- if (difference):
66
+ if indexes and self.check_index:
67
+ invalid_indexes = set(indexes) - self.check_index
68
+ if invalid_indexes:
65
69
  warnings.warn(
66
- f"{list(difference)} 沒有出現在資料表中,請確認column名稱是否正確",
67
- UserWarning)
68
-
69
- if (not start_date):
70
- start_date = datetime.strptime("2005-01-01", "%Y-%m-%d")
71
- else:
72
- start_date = datetime.strptime(start_date, "%Y-%m-%d")
73
-
74
- if (fetch_mode in {self.FetchMode.QOQ, self.FetchMode.QOQ_NOCAL}):
75
-
76
- if (not end_date):
77
- end_date = datetime.today()
78
- else:
79
- end_date = datetime.strptime(end_date, "%Y-%m-%d")
80
-
81
- assert (start_date <= end_date)
82
-
83
- start_year = start_date.year
84
- start_season = (start_date.month - 1) // 4 + 1
85
- end_year = end_date.year
86
- end_season = (end_date.month - 1) // 4 + 1
87
-
88
- if (fetch_mode == self.FetchMode.QOQ):
89
- use_cal = True
90
- else:
91
- use_cal = False
92
-
93
- data_df = self.get_QoQ_data(
70
+ f"{list(invalid_indexes)} 不存在,請確認欄位名稱", UserWarning
71
+ )
72
+
73
+ start_date = datetime.strptime(
74
+ start_date, "%Y-%m-%d"
75
+ ) if start_date else datetime(2005, 1, 1)
76
+
77
+ if fetch_mode in {self.FetchMode.QOQ, self.FetchMode.QOQ_NOCAL}:
78
+ end_date = datetime.strptime(end_date, "%Y-%m-%d"
79
+ ) if end_date else datetime.today()
80
+ assert start_date <= end_date
81
+ start_year, end_year = start_date.year, end_date.year
82
+ return self.get_QoQ_data(
94
83
  ticker=ticker,
95
- start_year=start_year,
96
- start_season=start_season,
97
- end_year=end_year,
98
- end_season=end_season,
84
+ start_date=start_date,
85
+ end_date=end_date,
99
86
  report_type=report_type,
100
87
  indexes=indexes,
101
- use_cal=use_cal)
102
-
103
- return data_df
104
-
105
- elif (fetch_mode in {self.FetchMode.YOY, self.FetchMode.YOY_NOCAL}):
106
- start_year = start_date.year
107
- end_date = self.get_latest_data_time(ticker)
108
- if (not end_date):
109
- end_date = datetime.today()
110
-
111
- end_year = end_date.year
112
- season = (end_date.month - 1) // 4 + 1
88
+ use_cal=(fetch_mode == self.FetchMode.QOQ)
89
+ )
113
90
 
114
- if (fetch_mode == self.FetchMode.YOY):
115
- use_cal = True
116
- else:
117
- use_cal = False
118
-
119
- data_df = self.get_YoY_data(
91
+ elif fetch_mode in {self.FetchMode.YOY, self.FetchMode.YOY_NOCAL}:
92
+ end_date = self.get_latest_data_time(ticker) or datetime.today()
93
+ start_year, end_year = start_date.year, end_date.year
94
+ end_season = (end_date.month - 1) // 4 + 1
95
+ return self.get_YoY_data(
120
96
  ticker=ticker,
121
97
  start_year=start_year,
122
98
  end_year=end_year,
123
- season=season,
99
+ season=end_season,
124
100
  report_type=report_type,
125
101
  indexes=indexes,
126
- use_cal=use_cal)
127
-
128
- return data_df
102
+ use_cal=(fetch_mode == self.FetchMode.YOY)
103
+ )
129
104
 
130
105
  def get_QoQ_data(
131
- self,
132
- ticker,
133
- start_year,
134
- start_season,
135
- end_year,
136
- end_season,
137
- report_type="Q",
138
- indexes=[],
139
- use_cal=False):
106
+ self,
107
+ ticker,
108
+ start_date,
109
+ end_date,
110
+ report_type="Q",
111
+ indexes=[],
112
+ use_cal=False
113
+ ):
140
114
  """
141
115
  取得時間範圍內每季資料
142
116
  """
143
- if (use_cal):
144
- if (start_season == 1):
145
- lower_bound_year = start_year - 1
146
- lower_bound_season = 4
147
-
148
- else:
149
- lower_bound_year = start_year
150
- lower_bound_season = start_season - 1
151
-
152
- else:
153
- lower_bound_year = start_year,
154
- lower_bound_season = start_season
155
-
156
- if (not indexes): # 沒有指定 -> 取全部
157
- pipeline = [
158
- {
159
- "$match": {
160
- "ticker": ticker
161
- }
162
- }, {
163
- "$unwind": "$data"
164
- }, {
165
- "$match": {
166
- "$or": [
167
- {
168
- "data.year": {
169
- "$gt": start_year,
170
- "$lt": end_year
171
- }
172
- }, {
173
- "data.year": start_year,
174
- "data.season": {
175
- "$gte": start_season
176
- }
177
- }, {
178
- "data.year": end_year,
179
- "data.season": {
180
- "$lte": end_season
181
- }
182
- }, {
183
- "data.year": lower_bound_year,
184
- "data.season": lower_bound_season
185
- }
186
- ]
187
- }
188
- }, {
189
- "$project": {
190
- "data.year": 1,
191
- "data.season": 1,
192
- f"data.{report_type}": 1,
193
- "_id": 0
194
- }
195
- }
196
- ]
197
-
198
- else: # 取指定index
199
- project_stage = {"data.year": 1, "data.season": 1}
200
- for index in indexes:
201
- project_stage[f"data.{report_type}.{index}"] = 1
117
+ start_year, start_season = start_date.year, (
118
+ start_date.month - 1
119
+ ) // 4 + 1
120
+ end_year, end_season = end_date.year, (end_date.month - 1) // 4 + 1
121
+ lower_bound_year, lower_bound_season = (
122
+ start_year - 1, 4
123
+ ) if start_season == 1 else (start_year, start_season - 1)
124
+
125
+ pipeline = self.build_pipeline(
126
+ ticker, start_year, start_season, end_year, end_season,
127
+ lower_bound_year, lower_bound_season, report_type, indexes
128
+ )
129
+ fetched_data = self.collection.aggregate(pipeline).to_list()
202
130
 
203
- pipeline = [
204
- {
205
- "$match": {
206
- "ticker": ticker
207
- }
208
- }, {
209
- "$unwind": "$data"
210
- }, {
211
- "$match": {
212
- "$or": [
213
- {
214
- "data.year": {
215
- "$gt": start_year,
216
- "$lt": end_year
217
- }
218
- }, {
219
- "data.year": start_year,
220
- "data.season": {
221
- "$gte": start_season
222
- }
223
- }, {
224
- "data.year": end_year,
225
- "data.season": {
226
- "$lte": end_season
227
- }
228
- }, {
229
- "data.year": lower_bound_year,
230
- "data.season": lower_bound_season
231
- }
232
- ]
233
- }
234
- }, {
235
- "$project": project_stage
236
- }
237
- ]
131
+ data_dict = self.transform_value(
132
+ StatsProcessor.list_of_dict_to_dict(
133
+ data_list=fetched_data,
134
+ keys=["year", "season"],
135
+ delimeter="Q",
136
+ data_key=report_type
137
+ )
138
+ )
238
139
 
239
- fetched_data = self.collection.aggregate(pipeline).to_list()
240
- data_dict = StatsProcessor.list_of_dict_to_dict(
241
- fetched_data,
242
- keys=["year", "season"],
243
- delimeter="Q",
244
- data_key=report_type)
245
-
246
- data_dict = self.transform_value(data_dict)
247
-
248
- if (use_cal):
249
- data_with_QoQ = self.cal_QoQ(data_dict)
250
- data_df = pd.DataFrame.from_dict(data_with_QoQ)
251
- data_df = data_df.iloc[:, 1:]
252
- data_df = data_df.iloc[:, ::-1].T
253
- data_dict = data_df.to_dict()
254
- data_dict = self.get_dict_of_df(data_dict)
255
- return data_dict
256
- else:
257
- data_df = pd.DataFrame.from_dict(data_dict)
258
- data_df = data_df.iloc[:, ::-1]
259
- return data_df
140
+ return self.calculate_and_format(data_dict, use_cal, self.cal_QoQ)
260
141
 
261
142
  def get_YoY_data(
262
- self,
263
- ticker,
264
- start_year,
265
- end_year,
266
- season,
267
- report_type="Q",
268
- indexes=[],
269
- use_cal=False):
143
+ self,
144
+ ticker,
145
+ start_year,
146
+ end_year,
147
+ season,
148
+ report_type="Q",
149
+ indexes=[],
150
+ use_cal=False
151
+ ):
270
152
  """
271
153
  取得某季歷年資料
272
154
  """
273
- if (use_cal):
274
- select_year = set()
275
-
276
- for year in range(start_year, end_year + 1):
277
- year_shifts = {year, year - 1, year - 3, year - 5, year - 10}
278
-
279
- select_year = select_year.union(year_shifts)
280
-
281
- select_year = sorted(list(select_year), reverse=True)
282
- else:
283
- select_year = [year for year in range(start_year, end_year + 1)]
284
-
285
- if (not indexes): # 沒有指定 -> 取全部
286
- pipeline = [
287
- {
288
- "$match": {
289
- "ticker": ticker
290
- }
291
- }, {
292
- "$unwind": "$data"
293
- }, {
294
- "$match": {
295
- "$or": [
296
- {
297
- "$and": [
298
- {
299
- "data.year": {
300
- "$in": select_year
301
- }
302
- }, {
303
- "data.season": {
304
- "$eq": season
305
- }
306
- }
307
- ]
308
- },
309
- ]
310
- }
311
- }, {
312
- "$project": {
313
- "data.year": 1,
314
- "data.season": 1,
315
- f"data.{report_type}": 1,
316
- "_id": 0
317
- }
318
- }
319
- ]
320
-
321
- else: # 取指定index
322
- project_stage = {"data.year": 1, "data.season": 1}
323
- for index in indexes:
324
- project_stage[f"data.{report_type}.{index}"] = 1
325
-
326
- pipeline = [
327
- {
328
- "$match": {
329
- "ticker": ticker
330
- }
331
- }, {
332
- "$unwind": "$data"
333
- }, {
334
- "$match": {
335
- "$and": [
336
- {
337
- "data.year": {
338
- "$in": select_year
339
- }
340
- }, {
341
- "data.season": {
342
- "$eq": season
343
- }
344
- }
345
- ]
346
- }
347
- }, {
348
- "$project": project_stage
349
- }
350
- ]
351
-
155
+ select_year = sorted(
156
+ {year
157
+ for year in range(start_year, end_year + 1)} | {
158
+ y
159
+ for year in range(start_year, end_year + 1)
160
+ for y in {year, year - 1, year - 3, year - 5, year - 10}
161
+ }
162
+ ) if use_cal else list(range(start_year, end_year + 1))
163
+
164
+ pipeline = self.build_pipeline(
165
+ ticker,
166
+ select_year,
167
+ season,
168
+ None,
169
+ None,
170
+ None,
171
+ None,
172
+ report_type,
173
+ indexes,
174
+ year_based=True
175
+ )
352
176
  fetched_data = self.collection.aggregate(pipeline).to_list()
177
+ data_dict = self.transform_value(
178
+ StatsProcessor.list_of_dict_to_dict(
179
+ fetched_data, ["year", "season"], "Q", report_type
180
+ )
181
+ )
182
+
183
+ return self.calculate_and_format(
184
+ data_dict, use_cal,
185
+ lambda x: self.cal_YoY(x, start_year, end_year, season)
186
+ )
353
187
 
354
- # 處理計算YoY
355
- data_dict = StatsProcessor.list_of_dict_to_dict(
356
- fetched_data,
357
- keys=['year', 'season'],
358
- data_key=report_type,
359
- delimeter='Q')
360
-
361
- data_dict = self.transform_value(data_dict)
362
-
363
- if (use_cal):
364
- data_with_YoY = self.cal_YoY(
365
- data_dict, start_year, end_year, season)
366
- data_df = pd.DataFrame.from_dict(data_with_YoY)
367
- data_df = data_df.iloc[:, ::-1].T
368
- data_dict = data_df.to_dict()
369
- data_dict = self.get_dict_of_df(data_dict)
370
- return data_dict
371
- else:
372
- data_df = pd.DataFrame.from_dict(data_dict)
373
- data_df = data_df.iloc[:, ::-1]
374
- return data_df
375
-
376
188
  def transform_value(self, data_dict):
377
189
  """
378
190
  處理千元, %等單位
379
191
  """
380
192
 
381
193
  data_df = pd.DataFrame.from_dict(data_dict)
382
-
383
- process_set = set(data_df.index).intersection(set(self.thousand_index_list))
384
- process_list = list(process_set)
385
- data_df.loc[process_list] = data_df.loc[process_list].map(
386
- lambda x : StatsProcessor.cal_non_percentage(x, postfix="千元")
387
- )
194
+ for category, postfix in [(self.thousand_index_list, "千元"),
195
+ (self.percent_index_list, "%")]:
196
+ process_list = list(set(data_df.index) & set(category))
197
+ if postfix == "%":
198
+ data_df.loc[process_list] = data_df.loc[process_list].astype(
199
+ str
200
+ ).map(lambda x: f"{x}%")
201
+ else:
202
+ data_df.loc[process_list] = data_df.loc[process_list].map(
203
+ lambda x: StatsProcessor.
204
+ cal_non_percentage(x, postfix=postfix)
205
+ )
206
+ return data_df.to_dict()
207
+
208
+ def build_pipeline(
209
+ self,
210
+ ticker,
211
+ start_year,
212
+ start_season,
213
+ end_year,
214
+ end_season,
215
+ lower_bound_year,
216
+ lower_bound_season,
217
+ report_type,
218
+ indexes,
219
+ year_based=False
220
+ ):
221
+ project_stage = {
222
+ "_id": 0,
223
+ "data.year": 1,
224
+ "data.season": 1,
225
+ **{
226
+ f"data.{report_type}.{idx}": 1
227
+ for idx in indexes
228
+ }
229
+ } if indexes else {
230
+ "_id": 0,
231
+ "data.year": 1,
232
+ "data.season": 1,
233
+ f"data.{report_type}": 1
234
+ }
235
+
236
+ if (year_based):
237
+ match_stage = {
238
+ "data.year": {
239
+ "$in": start_year
240
+ } if year_based else {
241
+ "$gt": start_year,
242
+ "$lt": end_year
243
+ },
244
+ "data.season": end_season
245
+ }
246
+ else:
247
+ match_stage = {
248
+ "$or": [
249
+ {
250
+ "data.year": {
251
+ "$gt": start_year,
252
+ "$lt": end_year
253
+ }
254
+ }, {
255
+ "data.year": start_year,
256
+ "data.season": {
257
+ "$gte": start_season
258
+ }
259
+ }, {
260
+ "data.year": end_year,
261
+ "data.season": {
262
+ "$lte": end_season
263
+ }
264
+ }, {
265
+ "data.year": lower_bound_year,
266
+ "data.season": lower_bound_season
267
+ }
268
+ ]
269
+ }
388
270
 
389
- process_set = set(data_df.index).intersection(set(self.percent_index_list))
390
- process_list = list(process_set)
391
- data_df.loc[process_list] = data_df.loc[process_list].map(
392
- lambda x : f"{x}%"
271
+ return [
272
+ {
273
+ "$match": {
274
+ "ticker": ticker
275
+ }
276
+ }, {
277
+ "$unwind": "$data"
278
+ }, {
279
+ "$match": match_stage
280
+ }, {
281
+ "$project": project_stage
282
+ }
283
+ ]
284
+
285
+ def calculate_and_format(self, data_dict, use_cal, calc_function):
286
+ data_df = pd.DataFrame.from_dict(
287
+ calc_function(data_dict) if use_cal else data_dict
288
+ ).iloc[:, ::-1]
289
+ return data_df if not use_cal else self.get_dict_of_df(
290
+ data_df.T.to_dict()
393
291
  )
394
292
 
395
- data_dict = data_df.to_dict()
396
-
397
- return data_dict
398
293
 
399
294
  class TEJStockPriceFetcher(BaseTEJFetcher):
400
295
 
401
296
  def __init__(
402
- self,
403
- mongo_uri,
404
- db_name: str = "company",
405
- collection_name: str = None):
297
+ self, mongo_uri, db_name: str = "company", collection_name: str = None
298
+ ):
406
299
  self.mongo_uri = mongo_uri
407
300
  self.db_name = db_name
408
301
  self.collection_name = collection_name
@@ -411,14 +304,16 @@ class TEJStockPriceFetcher(BaseTEJFetcher):
411
304
  self.db = self.client[self.db_name]
412
305
  self.collection = self.db[self.collection_name]
413
306
 
414
- self.check_period = ['1d', '7d', '1m', '3m', '1y', '3y', '5y', '10y', 'all']
307
+ self.check_period = [
308
+ '1d', '7d', '1m', '3m', '1y', '3y', '5y', '10y', 'all'
309
+ ]
415
310
 
416
311
  def get(
417
- self,
418
- ticker: str = "2330",
419
- start_date: str = "2024-10-01",
420
- period: str = None
421
- ):
312
+ self,
313
+ ticker: str = "2330",
314
+ start_date: str = "2024-10-01",
315
+ period: str = None
316
+ ):
422
317
  """
423
318
  取得開高低收資料
424
319
  start_date: str: 起始的日期
@@ -463,4 +358,4 @@ class TEJStockPriceFetcher(BaseTEJFetcher):
463
358
 
464
359
  data_df = pd.DataFrame(elements).set_index('mdate')
465
360
 
466
- return data_df
361
+ return data_df