neurostats-API 0.0.14__tar.gz → 0.0.15__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/PKG-INFO +61 -2
  2. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/README.md +60 -1
  3. neurostats_API-0.0.15/neurostats_API/__init__.py +13 -0
  4. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/fetchers/__init__.py +2 -0
  5. neurostats_API-0.0.15/neurostats_API/fetchers/base.py +152 -0
  6. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/fetchers/tech.py +34 -7
  7. neurostats_API-0.0.15/neurostats_API/fetchers/tej_finance_report.py +313 -0
  8. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/utils/__init__.py +2 -1
  9. neurostats_API-0.0.15/neurostats_API/utils/calculate_value.py +26 -0
  10. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/utils/data_process.py +56 -1
  11. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API.egg-info/PKG-INFO +62 -3
  12. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API.egg-info/SOURCES.txt +5 -1
  13. neurostats_API-0.0.15/neurostats_API.egg-info/requires.txt +6 -0
  14. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/setup.py +7 -1
  15. neurostats_API-0.0.15/test/test_tej.py +26 -0
  16. neurostats_api-0.0.14/neurostats_API/__init__.py +0 -1
  17. neurostats_api-0.0.14/neurostats_API/fetchers/base.py +0 -54
  18. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/MANIFEST.in +0 -0
  19. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/cli.py +0 -0
  20. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/fetchers/balance_sheet.py +0 -0
  21. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/fetchers/cash_flow.py +0 -0
  22. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/fetchers/finance_overview.py +0 -0
  23. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/fetchers/institution.py +0 -0
  24. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/fetchers/margin_trading.py +0 -0
  25. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/fetchers/month_revenue.py +0 -0
  26. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/fetchers/profit_lose.py +0 -0
  27. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/fetchers/value_invest.py +0 -0
  28. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/main.py +0 -0
  29. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/tools/balance_sheet.yaml +0 -0
  30. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/tools/cash_flow_percentage.yaml +0 -0
  31. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/tools/finance_overview_dict.yaml +0 -0
  32. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/tools/profit_lose.yaml +0 -0
  33. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/tools/seasonal_data_field_dict.txt +0 -0
  34. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/utils/datetime.py +0 -0
  35. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/utils/db_client.py +0 -0
  36. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API/utils/fetcher.py +0 -0
  37. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API.egg-info/dependency_links.txt +0 -0
  38. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/neurostats_API.egg-info/top_level.txt +0 -0
  39. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/setup.cfg +0 -0
  40. {neurostats_api-0.0.14 → neurostats_API-0.0.15}/test/test_fetchers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: neurostats_API
3
- Version: 0.0.14
3
+ Version: 0.0.15
4
4
  Summary: The service of NeuroStats website
5
5
  Home-page: https://github.com/NeurowattStats/NeuroStats_API.git
6
6
  Author: JasonWang@Neurowatt
@@ -83,7 +83,7 @@ pip install neurostats-API
83
83
  ```Python
84
84
  >>> import neurostats_API
85
85
  >>> print(neurostats_API.__version__)
86
- 0.0.14
86
+ 0.0.15
87
87
  ```
88
88
 
89
89
  ### 得到最新一期的評價資料與歷年評價
@@ -667,7 +667,66 @@ fetcher.query()
667
667
 
668
668
  請注意`range`, `last_range`, `52week_range`這三個項目型態為字串,其餘為float
669
669
 
670
+
671
+ ## TEJ 相關
672
+ ### 會計師簽證財務資料
673
+ ```Python
674
+ from neurostats_API import FinanceReportFetcher
675
+
676
+ mongo_uri = <MongoDB 的 URI>
677
+ db_name = 'company' # 連接的DB名稱
678
+ collection_name = "TWN/AINVFQ1" # 連接的collection對象
679
+
680
+ fetcher = FinanceReportFetcher(
681
+ mongo_uri = mongo_uri,
682
+ db_name = db_name,
683
+ collection_name = collection_name
684
+ )
685
+
686
+ data = fetcher.get(
687
+ ticker = "2330" # 任意的股票代碼
688
+ fetch_mode = fetcher.YOY_NOCAL # 取得模式
689
+ start_date = "2005-01-01",
690
+ end_date = "2024-12-31",
691
+ report_type = "Q",
692
+ indexes = []
693
+ )
694
+ ```
695
+ - `ticker`: 股票代碼
696
+
697
+ - `fetch_mode` : 取得模式,為`fetcher.YOY_NOCAL` 或 `fetcher.QOQ_NOCAL`
698
+ - `YOY_NOCAL`: 以end_date為準,取得與end_date為準同季的歷年資料,時間範圍以start_date為準
699
+ > 例如`start_date = "2020-07-01"`, `end_date = "2024-01-01"`,會回傳2020~2024的第一季資料
700
+
701
+ - `QOQ_NOCAL`: 時間範圍內的每季資料
702
+
703
+ - `start_date`: 開始日期,不設定時預設為`2005-01-01`
704
+
705
+ - `end_date`: 結束日期,不設定時預設為資料庫最新資料的日期
706
+
707
+ - `report_type`: 選擇哪種報告,預設為`Q`
708
+ - `A`: 當年累計
709
+ - `Q`: 當季數值
710
+ - `TTM`: 移動四季 (包括當季在內,往前累計四個季度)
711
+
712
+ - `indexes`: 選擇的column,需要以TEJ提供的欄位名稱為準,不提供時或提供`[]`會回傳全部column
713
+ - 範例輸入: `['bp41', 'bp51']`
714
+
715
+ [TEJ資料集連結](https://tquant.tejwin.com/%E8%B3%87%E6%96%99%E9%9B%86/)
716
+ 請看 `會計師簽證財務資料`
717
+
718
+ #### 回傳資料
719
+ fetch_mode設定為`YOY_NOCAL`與`QOQ_NOCAL`下
720
+ 為回傳pd.DataFrame,column名稱為<年份>Q<季>, row名稱為指定財報項目
721
+
670
722
  ## 版本紀錄
723
+ ## 0.0.15
724
+ - TechFetcher中新增指數條件
725
+
726
+ - 新增tej_fetcher索取TEJ相關的資料
727
+
728
+ - package新增depensnecy,可以安裝需要的相關package
729
+
671
730
  ## 0.0.14
672
731
  - 修改部分財報資料錯誤的乘以1000的問題
673
732
 
@@ -73,7 +73,7 @@ pip install neurostats-API
73
73
  ```Python
74
74
  >>> import neurostats_API
75
75
  >>> print(neurostats_API.__version__)
76
- 0.0.14
76
+ 0.0.15
77
77
  ```
78
78
 
79
79
  ### 得到最新一期的評價資料與歷年評價
@@ -657,7 +657,66 @@ fetcher.query()
657
657
 
658
658
  請注意`range`, `last_range`, `52week_range`這三個項目型態為字串,其餘為float
659
659
 
660
+
661
+ ## TEJ 相關
662
+ ### 會計師簽證財務資料
663
+ ```Python
664
+ from neurostats_API import FinanceReportFetcher
665
+
666
+ mongo_uri = <MongoDB 的 URI>
667
+ db_name = 'company' # 連接的DB名稱
668
+ collection_name = "TWN/AINVFQ1" # 連接的collection對象
669
+
670
+ fetcher = FinanceReportFetcher(
671
+ mongo_uri = mongo_uri,
672
+ db_name = db_name,
673
+ collection_name = collection_name
674
+ )
675
+
676
+ data = fetcher.get(
677
+ ticker = "2330" # 任意的股票代碼
678
+ fetch_mode = fetcher.YOY_NOCAL # 取得模式
679
+ start_date = "2005-01-01",
680
+ end_date = "2024-12-31",
681
+ report_type = "Q",
682
+ indexes = []
683
+ )
684
+ ```
685
+ - `ticker`: 股票代碼
686
+
687
+ - `fetch_mode` : 取得模式,為`fetcher.YOY_NOCAL` 或 `fetcher.QOQ_NOCAL`
688
+ - `YOY_NOCAL`: 以end_date為準,取得與end_date為準同季的歷年資料,時間範圍以start_date為準
689
+ > 例如`start_date = "2020-07-01"`, `end_date = "2024-01-01"`,會回傳2020~2024的第一季資料
690
+
691
+ - `QOQ_NOCAL`: 時間範圍內的每季資料
692
+
693
+ - `start_date`: 開始日期,不設定時預設為`2005-01-01`
694
+
695
+ - `end_date`: 結束日期,不設定時預設為資料庫最新資料的日期
696
+
697
+ - `report_type`: 選擇哪種報告,預設為`Q`
698
+ - `A`: 當年累計
699
+ - `Q`: 當季數值
700
+ - `TTM`: 移動四季 (包括當季在內,往前累計四個季度)
701
+
702
+ - `indexes`: 選擇的column,需要以TEJ提供的欄位名稱為準,不提供時或提供`[]`會回傳全部column
703
+ - 範例輸入: `['bp41', 'bp51']`
704
+
705
+ [TEJ資料集連結](https://tquant.tejwin.com/%E8%B3%87%E6%96%99%E9%9B%86/)
706
+ 請看 `會計師簽證財務資料`
707
+
708
+ #### 回傳資料
709
+ fetch_mode設定為`YOY_NOCAL`與`QOQ_NOCAL`下
710
+ 為回傳pd.DataFrame,column名稱為<年份>Q<季>, row名稱為指定財報項目
711
+
660
712
  ## 版本紀錄
713
+ ## 0.0.15
714
+ - TechFetcher中新增指數條件
715
+
716
+ - 新增tej_fetcher索取TEJ相關的資料
717
+
718
+ - package新增depensnecy,可以安裝需要的相關package
719
+
661
720
  ## 0.0.14
662
721
  - 修改部分財報資料錯誤的乘以1000的問題
663
722
 
@@ -0,0 +1,13 @@
1
+ __version__='0.0.15'
2
+
3
+ from .fetchers import (
4
+ BalanceSheetFetcher,
5
+ CashFlowFetcher,
6
+ FinanceOverviewFetcher,
7
+ FinanceReportFetcher,
8
+ InstitutionFetcher,
9
+ MarginTradingFetcher,
10
+ MonthRevenueFetcher,
11
+ TechFetcher,
12
+ ProfitLoseFetcher
13
+ )
@@ -2,6 +2,8 @@ from .base import StatsDateTime, StatsFetcher
2
2
  from .balance_sheet import BalanceSheetFetcher
3
3
  from .cash_flow import CashFlowFetcher
4
4
  from .finance_overview import FinanceOverviewFetcher
5
+ from .tej_finance_report import FinanceReportFetcher
6
+ from .tech import TechFetcher
5
7
  from .institution import InstitutionFetcher
6
8
  from .margin_trading import MarginTradingFetcher
7
9
  from .month_revenue import MonthRevenueFetcher
@@ -0,0 +1,152 @@
1
+ import abc
2
+ from pymongo import MongoClient
3
+ import pandas as pd
4
+ import json
5
+ import pytz
6
+ from datetime import datetime, timedelta, date
7
+ from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
8
+ import yaml
9
+
10
+ class StatsFetcher:
11
+ def __init__(self, ticker, db_client):
12
+ self.ticker = ticker
13
+ self.db = db_client["company"] # Replace with your database name
14
+ self.collection = self.db["twse_stats"]
15
+
16
+ self.timezone = pytz.timezone("Asia/Taipei")
17
+
18
+ self.target_metric_dict = {
19
+ 'value': ['value'],
20
+ 'value_and_percentage': ['value', 'percentage'],
21
+ 'percentage': ['percentage'],
22
+ 'grand_total': ['grand_total'],
23
+ 'grand_total_values': ['grand_total', 'grand_total_percentage'],
24
+ 'grand_total_percentage': ['grand_total_percentage'],
25
+ 'growth': [f'YoY_{i}' for i in [1, 3, 5, 10]],
26
+ 'grand_total_growth': [f"YoY_{i}" for i in [1, 3, 5, 10]]
27
+ }
28
+
29
+
30
+ def prepare_query(self):
31
+ return [
32
+ {
33
+ "$match": {
34
+ "ticker": self.ticker,
35
+ }
36
+ },
37
+ ]
38
+
39
+ def collect_data(self, start_date, end_date):
40
+ pipeline = self.prepare_query()
41
+
42
+ fetched_data = list(self.collection.aggregate(pipeline))
43
+
44
+ return fetched_data[0]
45
+
46
+ def str_to_datetime(self, date_str):
47
+ year, month, day = [int(num) for num in date_str.split("-")]
48
+
49
+ date = datetime.strptime(date_str, "%Y-%m-%d")
50
+ date = self.timezone.localize(date)
51
+
52
+ season = (month - 1) // 3 + 1
53
+
54
+ return StatsDateTime(date, year, month, day, season)
55
+
56
+ class BaseTEJFetcher(abc.ABC):
57
+ def __init__(self):
58
+ self.client = None
59
+ self.db = None
60
+ self.collection = None
61
+
62
+ @abc.abstractmethod
63
+ def get(self):
64
+ pass
65
+
66
+ def get_latest_data_time(self, ticker):
67
+ latest_data = self.collection.find_one(
68
+ {"ticker": ticker},
69
+ {"last_update": 1, "_id" : 0}
70
+ )
71
+
72
+ try:
73
+ latest_date = latest_data['last_update']["latest_data_date"]
74
+ except Exception as e:
75
+ latest_date = None
76
+
77
+ return latest_date
78
+
79
+ def cal_YoY(self, data_dict: dict, start_year: int, end_year: int):
80
+ year_shifts = [1,3,5,10]
81
+ return_dict = {}
82
+ for year in range(start_year, end_year+1):
83
+ year_data = data_dict[str(year)]
84
+ year_keys = list(year_data.keys())
85
+ for key in year_keys:
86
+ if (key in 'season'):
87
+ continue
88
+
89
+ if (isinstance(year_data[key], (int, float))):
90
+ temp_dict = {"value": year_data[key]}
91
+
92
+ for shift in year_shifts:
93
+ this_value = year_data[key]
94
+ try:
95
+ past_year = str(year - shift)
96
+ last_value = data_dict[past_year][key]['value']
97
+ temp_dict[f"YoY_{shift}"] = YoY_Calculator.cal_growth(
98
+ this_value, last_value, delta = shift
99
+ )
100
+ except Exception as e:
101
+ temp_dict[f"YoY_{shift}"] = None
102
+
103
+ year_data[key] = temp_dict
104
+
105
+ else:
106
+ year_data.pop(key)
107
+
108
+ return_dict[year] = year_data
109
+
110
+
111
+ return return_dict
112
+
113
+ def cal_QoQ(self, data_dict):
114
+ return_dict = {}
115
+ for i, time_index in enumerate(data_dict.keys()):
116
+ year, season = time_index.split("Q")
117
+ year = int(year)
118
+ season = int(season)
119
+ if (season == 1):
120
+ last_year = year - 1
121
+ last_season = 4
122
+ else:
123
+ last_year = year
124
+ last_season = season - 1
125
+
126
+ this_data = data_dict[time_index]
127
+ this_keys = list(this_data.keys())
128
+ for key in this_keys:
129
+ if (key in 'season'):
130
+ continue
131
+
132
+ this_value = this_data[key]
133
+
134
+ if (isinstance(this_value, (int, float))):
135
+ temp_dict = {"value": this_value}
136
+
137
+ try:
138
+ last_value = data_dict[f"{last_year}Q{last_season}"][key]['value']
139
+
140
+ temp_dict['growth'] = YoY_Calculator.cal_growth(
141
+ this_value, last_value, delta=1
142
+ )
143
+ except Exception as e:
144
+ temp_dict['growth'] = None
145
+
146
+ this_data[key] = temp_dict
147
+
148
+ else:
149
+ this_data.pop(key)
150
+ return_dict[time_index] = this_data
151
+ return return_dict
152
+
@@ -1,9 +1,16 @@
1
1
  from .base import StatsFetcher
2
2
  import pandas as pd
3
+ import yfinance as yf
3
4
 
4
5
  class TechFetcher(StatsFetcher):
5
6
 
6
7
  def __init__(self, ticker:str, db_client):
8
+
9
+ """
10
+ The Capitalization-Weighted Index includes the following tickers:
11
+ ['GSPC', 'IXIC', 'DJI', 'TWII']
12
+ """
13
+
7
14
  super().__init__(ticker, db_client)
8
15
  self.full_ohlcv = self._get_ohlcv()
9
16
  self.basic_indexes = ['SMA5', 'SMA20', 'SMA60', 'EMA5', 'EMA20',
@@ -40,16 +47,36 @@ class TechFetcher(StatsFetcher):
40
47
  )
41
48
 
42
49
  def _get_ohlcv(self):
43
- query = {'ticker': self.ticker}
44
- ticker_full = list(self.collection.find(query))
50
+
51
+ if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
52
+
53
+ full_tick = f'^{self.ticker}'
54
+ yf_ticker = yf.Ticker(full_tick)
55
+ origin_df = yf_ticker.history(period="10y")
56
+ origin_df = origin_df.reset_index()
57
+ origin_df["Date"] = pd.to_datetime(origin_df["Date"]).dt.date
58
+ df = origin_df.rename(
59
+ columns={
60
+ "Date": "date",
61
+ "Open": "open",
62
+ "High": "high",
63
+ "Low": "low",
64
+ "Close": "close",
65
+ "Volume": "volume"
66
+ }
67
+ )
68
+ else:
69
+
70
+ query = {'ticker': self.ticker}
71
+ ticker_full = list(self.collection.find(query))
45
72
 
46
- if not ticker_full:
47
- raise ValueError(f"No data found for ticker: {self.ticker}")
73
+ if not ticker_full:
74
+ raise ValueError(f"No data found for ticker: {self.ticker}")
48
75
 
49
- if 'daily_data' not in ticker_full[0] or ticker_full[0]['daily_data'] is None:
50
- raise KeyError("Missing 'daily_data' in the retrieved data")
76
+ if 'daily_data' not in ticker_full[0] or ticker_full[0]['daily_data'] is None:
77
+ raise KeyError("Missing 'daily_data' in the retrieved data")
51
78
 
52
- df = pd.DataFrame(ticker_full[0]['daily_data'])
79
+ df = pd.DataFrame(ticker_full[0]['daily_data'])
53
80
 
54
81
  selected_cols = ['date','open','high','low','close','volume']
55
82
 
@@ -0,0 +1,313 @@
1
+ from .base import BaseTEJFetcher
2
+ from datetime import datetime
3
+ from enum import Enum
4
+ import pandas as pd
5
+ from pymongo import MongoClient
6
+ from ..utils import StatsProcessor, YoY_Calculator
7
+ import warnings
8
+
9
+
10
+ class FinanceReportFetcher(BaseTEJFetcher):
11
+ class FetchMode(Enum):
12
+ YOY = 1
13
+ QOQ = 2
14
+ YOY_NOCAL = 3
15
+ QOQ_NOCAL = 4
16
+
17
+ def __init__(self, mongo_uri, db_name = "company", collection_name = "TWN/AINVFQ1"):
18
+ self.client = MongoClient(mongo_uri)
19
+ self.db = self.client[db_name]
20
+ self.collection = self.db[collection_name]
21
+
22
+ self.check_index = {
23
+ 'coid', 'mdate', 'key3', 'no',
24
+ 'sem', 'merg', 'curr', 'annd',
25
+ 'fin_ind', 'bp11', 'bp21', 'bp22',
26
+ 'bp31', 'bp41', 'bp51', 'bp53',
27
+ 'bp61', 'bp62', 'bp63', 'bp64',
28
+ 'bp65', 'bf11', 'bf12', 'bf21',
29
+ 'bf22', 'bf41', 'bf42', 'bf43',
30
+ 'bf44', 'bf45', 'bf99', 'bsca',
31
+ 'bsnca', 'bsta','bscl','bsncl',
32
+ 'bstl','bsse','bslse','debt',
33
+ 'quick','ppe','ar','ip12',
34
+ 'ip22','ip31','ip51','iv41',
35
+ 'if11','isibt','isni','isnip',
36
+ 'eps','ispsd','gm','opi',
37
+ 'nri','ri','nopi','ebit',
38
+ 'cip31','cscfo','cscfi','cscff',
39
+ 'person','shares','wavg','taxrate',
40
+ 'r104','r115','r105','r106',
41
+ 'r107','r108','r201','r112',
42
+ 'r401','r402','r403','r404',
43
+ 'r405','r408','r409','r410',
44
+ 'r502','r501','r205','r505',
45
+ 'r517','r512','r509','r608',
46
+ 'r616','r610','r607','r613',
47
+ 'r612','r609','r614','r611',
48
+ 'r307','r304','r305','r306',
49
+ 'r316','r834'
50
+ }
51
+
52
+ def get(
53
+ self,
54
+ ticker,
55
+ fetch_mode: FetchMode = FetchMode.QOQ,
56
+ start_date: str = None,
57
+ end_date: str = None,
58
+ report_type: str = "Q",
59
+ indexes: list = []
60
+ ):
61
+ """
62
+ 基礎的query function
63
+ ticker(str): 股票代碼
64
+ start_date(str): 開頭日期範圍
65
+ end_date(str): = 結束日期範圍
66
+ report_type(str): 報告型態 {"A", "Q", "TTM"}
67
+ fetch_mode(class FetchMode):
68
+ YoY : 起始日期到結束日期範圍內,特定該季的資料
69
+ QoQ : 起始日期到結束日期內,每季的資料(與上一季成長率)
70
+ indexes(List): 指定的index
71
+ """
72
+ # 確認indexes中是否有錯誤的index,有的話回傳warning
73
+ if (indexes and self.check_index):
74
+ indexes = set(indexes)
75
+ difference = indexes-self.check_index
76
+ if (difference):
77
+ warnings.warn(
78
+ f"{list(difference)} 沒有出現在資料表中,請確認column名稱是否正確",
79
+ UserWarning
80
+ )
81
+
82
+
83
+ if (fetch_mode in {
84
+ self.FetchMode.QOQ,
85
+ self.FetchMode.QOQ_NOCAL
86
+ }
87
+ ):
88
+ if (not start_date):
89
+ warnings.warn("No start_date specified, use default date = \"2005-01-01\"", UserWarning)
90
+ start_date = datetime.strptime("2005-01-01", "%Y-%m-%d")
91
+ if (not end_date):
92
+ warnings.warn("No end_date specified, use default date = today", UserWarning)
93
+ end_date = datetime.today()
94
+
95
+ assert (start_date <= end_date)
96
+ start_date = datetime.strptime(start_date, "%Y-%m-%d")
97
+ end_date = datetime.strptime(end_date, "%Y-%m-%d")
98
+
99
+ start_year = start_date.year
100
+ start_season = (start_date.month - 1) // 4 + 1
101
+ end_year = end_date.year
102
+ end_season = (end_date.month - 1) // 4 + 1
103
+
104
+ if (fetch_mode == self.FetchMode.QOQ):
105
+ use_cal = True
106
+ else:
107
+ use_cal = False
108
+
109
+ data_df = self.get_QoQ_data(
110
+ ticker=ticker,
111
+ start_year=start_year,
112
+ start_season=start_season,
113
+ end_year=end_year,
114
+ end_season=end_season,
115
+ report_type=report_type,
116
+ indexes=indexes,
117
+ use_cal=use_cal
118
+ )
119
+
120
+ return data_df
121
+
122
+ elif (fetch_mode in {
123
+ self.FetchMode.YOY,
124
+ self.FetchMode.YOY_NOCAL
125
+ }
126
+ ):
127
+ start_year = 2005
128
+ end_date = self.get_latest_data_time(ticker)
129
+ if (not end_date):
130
+ end_date = datetime.today()
131
+
132
+ end_year = end_date.year
133
+ season = (end_date.month - 1) // 4 + 1
134
+
135
+ if (fetch_mode == self.FetchMode.YOY):
136
+ use_cal = True
137
+ else:
138
+ use_cal = False
139
+
140
+ data_df = self.get_YoY_data(
141
+ ticker = ticker,
142
+ start_year = start_year,
143
+ end_year = end_year,
144
+ season = season,
145
+ report_type = report_type,
146
+ indexes = indexes
147
+ )
148
+
149
+ return data_df
150
+
151
+ def get_QoQ_data(
152
+ self,
153
+ ticker,
154
+ start_year,
155
+ start_season,
156
+ end_year,
157
+ end_season,
158
+ report_type = "Q",
159
+ indexes = [],
160
+ use_cal = False
161
+ ):
162
+ """
163
+ 取得時間範圍內每季資料
164
+ """
165
+ if (not indexes): # 沒有指定 -> 取全部
166
+ pipeline = [
167
+ { "$match": { "ticker": ticker } },
168
+ { "$unwind": "$data" },
169
+ { "$match": {
170
+ "$or": [
171
+ { "data.year": { "$gt": start_year, "$lt": end_year } },
172
+ { "data.year": start_year, "data.season": { "$gte": start_season } },
173
+ { "data.year": end_year, "data.season": { "$lte": end_season } }
174
+ ]
175
+ }},
176
+ { "$project": {
177
+ "data.year": 1,
178
+ "data.season": 1,
179
+ f"data.{report_type}": 1,
180
+ "_id": 0
181
+ }
182
+ }
183
+ ]
184
+
185
+
186
+ else: # 取指定index
187
+ project_stage = {
188
+ "data.year": 1,
189
+ "data.season": 1
190
+ }
191
+ for index in indexes:
192
+ project_stage[f"data.{report_type}.{index}"] = 1
193
+
194
+ pipeline = [
195
+ { "$match": { "ticker": ticker } },
196
+ { "$unwind": "$data" },
197
+ { "$match": {
198
+ "$or": [
199
+ { "data.year": { "$gt": start_year, "$lt": end_year } },
200
+ { "data.year": start_year, "data.season": { "$gte": start_season } },
201
+ { "data.year": end_year, "data.season": { "$lte": end_season } }
202
+ ]
203
+ }},
204
+ { "$project": project_stage }
205
+ ]
206
+
207
+
208
+ fetched_data = self.collection.aggregate(pipeline).to_list()
209
+
210
+ data_dict = StatsProcessor.list_of_dict_to_dict(
211
+ fetched_data,
212
+ keys = ["year", "season"],
213
+ delimeter = "Q",
214
+ data_key=report_type
215
+ )
216
+ if (use_cal):
217
+ data_with_QoQ = self.cal_QoQ(data_dict)
218
+ data_df = pd.DataFrame.from_dict(data_with_QoQ)
219
+ else:
220
+ data_df = pd.DataFrame.from_dict(data_dict)
221
+ return data_df
222
+
223
+ def get_YoY_data(
224
+ self,
225
+ ticker,
226
+ start_year,
227
+ end_year,
228
+ season,
229
+ report_type = "Q",
230
+ indexes = [],
231
+ use_cal = False
232
+ ):
233
+ """
234
+ 取得某季歷年資料
235
+ """
236
+ if (use_cal):
237
+ select_year = set()
238
+
239
+ for year in range(start_year, end_year + 1):
240
+ year_shifts = {
241
+ year,
242
+ year - 1,
243
+ year - 3,
244
+ year - 5,
245
+ year - 10
246
+ }
247
+
248
+ select_year = select_year.union(year_shifts)
249
+
250
+ select_year = sorted(list(select_year), reverse=True)
251
+ else:
252
+ select_year = [year for year in range(start_year, end_year + 1)]
253
+
254
+ if (not indexes): # 沒有指定 -> 取全部
255
+ pipeline = [
256
+ { "$match": { "ticker": ticker } },
257
+ { "$unwind": "$data" },
258
+ { "$match": {
259
+ "$or":[
260
+ {
261
+ "$and": [
262
+ { "data.year": {"$in": select_year }},
263
+ { "data.season": {"$eq": season}}
264
+ ]
265
+ },
266
+ ]
267
+ }},
268
+ { "$project": {
269
+ "data.year": 1,
270
+ "data.season": 1,
271
+ f"data.{report_type}": 1,
272
+ "_id": 0
273
+ }
274
+ }
275
+ ]
276
+
277
+
278
+ else: # 取指定index
279
+ project_stage = {
280
+ "data.year": 1,
281
+ "data.season": 1
282
+ }
283
+ for index in indexes:
284
+ project_stage[f"data.{report_type}.{index}"] = 1
285
+
286
+ pipeline = [
287
+ { "$match": { "ticker": ticker } },
288
+ { "$unwind": "$data" },
289
+ { "$match": {
290
+ "$and": [
291
+ { "data.year": {"$in": select_year}},
292
+ { "data.season": {"$eq": season}}
293
+ ]
294
+ }},
295
+ { "$project": project_stage }
296
+ ]
297
+
298
+ fetched_data = self.collection.aggregate(pipeline).to_list()
299
+
300
+ # 處理計算YoY
301
+ data_dict = StatsProcessor.list_of_dict_to_dict(
302
+ fetched_data,
303
+ keys = ['year', 'season'],
304
+ data_key=report_type,
305
+ delimeter='Q'
306
+ )
307
+ if (use_cal):
308
+ data_with_YoY = self.cal_YoY(data_dict, start_year, end_year)
309
+ result_df = pd.DataFrame.from_dict(data_with_YoY)
310
+ else:
311
+ result_df = pd.DataFrame.from_dict(data_dict)
312
+
313
+ return result_df
@@ -1,4 +1,5 @@
1
1
  from .datetime import StatsDateTime
2
2
  from .db_client import DBClient
3
3
  from .data_process import StatsProcessor
4
- from .fetcher import StatsFetcher
4
+ from .fetcher import StatsFetcher
5
+ from .calculate_value import YoY_Calculator
@@ -0,0 +1,26 @@
1
+ class YoY_Calculator:
2
+ def __init__(self):
3
+ pass
4
+
5
+ @classmethod
6
+ def cal_growth(cls, target_value: float, past_value: float, delta: int):
7
+ """
8
+ 計算成長率以及年化成長率
9
+ target_value: float,這個時間的數值
10
+ past_value: float,過去的這個時間數值
11
+ delta: int,代表隔了幾年/季 delta > 1 時改以年化成長率計算
12
+ """
13
+ try:
14
+ if (delta > 1):
15
+ YoY = ((target_value / past_value)**(1 / delta)) - 1
16
+
17
+ else:
18
+ YoY = ((target_value - past_value) / past_value)
19
+
20
+ except Exception as e:
21
+ return None
22
+
23
+ if (isinstance(YoY, complex)): # 年化成長率有複數問題
24
+ return None
25
+
26
+ return YoY
@@ -20,7 +20,6 @@ class StatsProcessor:
20
20
  """
21
21
  1. 讀檔: txt / yaml
22
22
  2. 將巢狀dictionary / DataFrame扁平化
23
-
24
23
  """
25
24
 
26
25
  @classmethod
@@ -215,3 +214,59 @@ class StatsProcessor:
215
214
  return int(np.round(value).item())
216
215
  else:
217
216
  return value
217
+
218
+ @classmethod
219
+ def list_of_dict_to_dict(
220
+ cls,
221
+ data_list: list,
222
+ key: str = "",
223
+ keys: list = [],
224
+ delimeter: str = "_",
225
+ data_key: str = "Q"
226
+ ):
227
+ """
228
+ TEJ DB 用
229
+ List[Dict] -> Dict[Dict]
230
+ input:
231
+ data_list(List):
232
+ [
233
+ { "data":
234
+ {
235
+ "year": 2021...
236
+ "season": 1,
237
+ "Q": {}...
238
+
239
+ }
240
+ }
241
+ ]
242
+
243
+ key(str): 選擇哪一個key作為轉化後的index
244
+ delimeter(str): 多個key時要用甚麼分隔
245
+ return:
246
+ {
247
+ "2021" : {# Q下的資料} ...
248
+ }
249
+
250
+ or (keys = ['year', 'season'])
251
+ {
252
+ "2021Q2" : {}
253
+ }
254
+ """
255
+ assert (key or keys), "func list_of_dict_to_dict must have argument \"key\" or \"keys\""
256
+
257
+ return_dict = {}
258
+ if (key):
259
+ keys = [key]
260
+ for data in data_list:
261
+ data = data['data']
262
+
263
+ pop_keys = []
264
+
265
+ for key in keys:
266
+ assert (key in data.keys())
267
+ pop_keys.append(str(data.pop(key)))
268
+
269
+ pop_key = delimeter.join(pop_keys)
270
+ return_dict[pop_key] = data[data_key]
271
+
272
+ return return_dict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
- Name: neurostats_API
3
- Version: 0.0.14
2
+ Name: neurostats-API
3
+ Version: 0.0.15
4
4
  Summary: The service of NeuroStats website
5
5
  Home-page: https://github.com/NeurowattStats/NeuroStats_API.git
6
6
  Author: JasonWang@Neurowatt
@@ -83,7 +83,7 @@ pip install neurostats-API
83
83
  ```Python
84
84
  >>> import neurostats_API
85
85
  >>> print(neurostats_API.__version__)
86
- 0.0.14
86
+ 0.0.15
87
87
  ```
88
88
 
89
89
  ### 得到最新一期的評價資料與歷年評價
@@ -667,7 +667,66 @@ fetcher.query()
667
667
 
668
668
  請注意`range`, `last_range`, `52week_range`這三個項目型態為字串,其餘為float
669
669
 
670
+
671
+ ## TEJ 相關
672
+ ### 會計師簽證財務資料
673
+ ```Python
674
+ from neurostats_API import FinanceReportFetcher
675
+
676
+ mongo_uri = <MongoDB 的 URI>
677
+ db_name = 'company' # 連接的DB名稱
678
+ collection_name = "TWN/AINVFQ1" # 連接的collection對象
679
+
680
+ fetcher = FinanceReportFetcher(
681
+ mongo_uri = mongo_uri,
682
+ db_name = db_name,
683
+ collection_name = collection_name
684
+ )
685
+
686
+ data = fetcher.get(
687
+ ticker = "2330" # 任意的股票代碼
688
+ fetch_mode = fetcher.YOY_NOCAL # 取得模式
689
+ start_date = "2005-01-01",
690
+ end_date = "2024-12-31",
691
+ report_type = "Q",
692
+ indexes = []
693
+ )
694
+ ```
695
+ - `ticker`: 股票代碼
696
+
697
+ - `fetch_mode` : 取得模式,為`fetcher.YOY_NOCAL` 或 `fetcher.QOQ_NOCAL`
698
+ - `YOY_NOCAL`: 以end_date為準,取得與end_date為準同季的歷年資料,時間範圍以start_date為準
699
+ > 例如`start_date = "2020-07-01"`, `end_date = "2024-01-01"`,會回傳2020~2024的第一季資料
700
+
701
+ - `QOQ_NOCAL`: 時間範圍內的每季資料
702
+
703
+ - `start_date`: 開始日期,不設定時預設為`2005-01-01`
704
+
705
+ - `end_date`: 結束日期,不設定時預設為資料庫最新資料的日期
706
+
707
+ - `report_type`: 選擇哪種報告,預設為`Q`
708
+ - `A`: 當年累計
709
+ - `Q`: 當季數值
710
+ - `TTM`: 移動四季 (包括當季在內,往前累計四個季度)
711
+
712
+ - `indexes`: 選擇的column,需要以TEJ提供的欄位名稱為準,不提供時或提供`[]`會回傳全部column
713
+ - 範例輸入: `['bp41', 'bp51']`
714
+
715
+ [TEJ資料集連結](https://tquant.tejwin.com/%E8%B3%87%E6%96%99%E9%9B%86/)
716
+ 請看 `會計師簽證財務資料`
717
+
718
+ #### 回傳資料
719
+ fetch_mode設定為`YOY_NOCAL`與`QOQ_NOCAL`下
720
+ 為回傳pd.DataFrame,column名稱為<年份>Q<季>, row名稱為指定財報項目
721
+
670
722
  ## 版本紀錄
723
+ ## 0.0.15
724
+ - TechFetcher中新增指數條件
725
+
726
+ - 新增tej_fetcher索取TEJ相關的資料
727
+
728
+ - package新增depensnecy,可以安裝需要的相關package
729
+
671
730
  ## 0.0.14
672
731
  - 修改部分財報資料錯誤的乘以1000的問題
673
732
 
@@ -7,6 +7,7 @@ neurostats_API/main.py
7
7
  neurostats_API.egg-info/PKG-INFO
8
8
  neurostats_API.egg-info/SOURCES.txt
9
9
  neurostats_API.egg-info/dependency_links.txt
10
+ neurostats_API.egg-info/requires.txt
10
11
  neurostats_API.egg-info/top_level.txt
11
12
  neurostats_API/fetchers/__init__.py
12
13
  neurostats_API/fetchers/balance_sheet.py
@@ -18,6 +19,7 @@ neurostats_API/fetchers/margin_trading.py
18
19
  neurostats_API/fetchers/month_revenue.py
19
20
  neurostats_API/fetchers/profit_lose.py
20
21
  neurostats_API/fetchers/tech.py
22
+ neurostats_API/fetchers/tej_finance_report.py
21
23
  neurostats_API/fetchers/value_invest.py
22
24
  neurostats_API/tools/balance_sheet.yaml
23
25
  neurostats_API/tools/cash_flow_percentage.yaml
@@ -25,8 +27,10 @@ neurostats_API/tools/finance_overview_dict.yaml
25
27
  neurostats_API/tools/profit_lose.yaml
26
28
  neurostats_API/tools/seasonal_data_field_dict.txt
27
29
  neurostats_API/utils/__init__.py
30
+ neurostats_API/utils/calculate_value.py
28
31
  neurostats_API/utils/data_process.py
29
32
  neurostats_API/utils/datetime.py
30
33
  neurostats_API/utils/db_client.py
31
34
  neurostats_API/utils/fetcher.py
32
- test/test_fetchers.py
35
+ test/test_fetchers.py
36
+ test/test_tej.py
@@ -0,0 +1,6 @@
1
+ numpy>=2.1.0
2
+ pandas>=2.2.0
3
+ pymongo
4
+ pytz
5
+ python-dotenv
6
+ yfinance
@@ -2,10 +2,16 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='neurostats_API',
5
- version='0.0.14',
5
+ version='0.0.15',
6
6
  long_description=open('README.md', 'r', encoding='utf-8').read(),
7
7
  long_description_content_type='text/markdown',
8
8
  install_requires=[
9
+ "numpy>=2.1.0",
10
+ "pandas>=2.2.0",
11
+ "pymongo",
12
+ "pytz",
13
+ "python-dotenv",
14
+ "yfinance"
9
15
  ],
10
16
  author='JasonWang@Neurowatt',
11
17
  packages=find_packages(exclude=['.venv', 'test*', 'data_in_db', '.pytest_cache']),
@@ -0,0 +1,26 @@
1
+ import pytest
2
+
3
+ mongo_uri = "mongodb+srv://admin:Neurowatt456&@axonnews-mongodb.nlui1.mongodb.net/?retryWrites=true&w=majority&appName=axonnews-mongodb"
4
+ ticker = "2330"
5
+
6
+ def test_QoQ():
7
+ from neurostats_API import FinanceReportFetcher
8
+
9
+ fetcher = FinanceReportFetcher(mongo_uri)
10
+
11
+ data = fetcher.get(
12
+ ticker,
13
+ fetch_mode = fetcher.FetchMode.QOQ_NOCAL,
14
+ start_date="2024-01-01",
15
+ end_date="2024-12-31",
16
+ indexes = ['bp41', 'bp51']
17
+ )
18
+
19
+ print(data)
20
+
21
+ def test_YoY():
22
+ from neurostats_API import FinanceReportFetcher
23
+ fetcher = FinanceReportFetcher(mongo_uri)
24
+
25
+ data = fetcher.get(ticker, fetcher.FetchMode.YOY_NOCAL, indexes = ['bp41', 'bp51', 'arv'])
26
+ print (data)
@@ -1 +0,0 @@
1
- __version__='0.0.14'
@@ -1,54 +0,0 @@
1
- from pymongo import MongoClient
2
- import pandas as pd
3
- import json
4
- import pytz
5
- from datetime import datetime, timedelta, date
6
- from ..utils import StatsDateTime, StatsProcessor
7
- import yaml
8
-
9
- class StatsFetcher:
10
- def __init__(self, ticker, db_client):
11
- self.ticker = ticker
12
- self.db = db_client[
13
- "company"] # Replace with your database name
14
- self.collection = self.db["twse_stats"]
15
-
16
- self.timezone = pytz.timezone("Asia/Taipei")
17
-
18
- self.target_metric_dict = {
19
- 'value': ['value'],
20
- 'value_and_percentage': ['value', 'percentage'],
21
- 'percentage': ['percentage'],
22
- 'grand_total': ['grand_total'],
23
- 'grand_total_values': ['grand_total', 'grand_total_percentage'],
24
- 'grand_total_percentage': ['grand_total_percentage'],
25
- 'growth': [f'YoY_{i}' for i in [1, 3, 5, 10]],
26
- 'grand_total_growth': [f"YoY_{i}" for i in [1, 3, 5, 10]]
27
- }
28
-
29
-
30
- def prepare_query(self):
31
- return [
32
- {
33
- "$match": {
34
- "ticker": self.ticker,
35
- }
36
- },
37
- ]
38
-
39
- def collect_data(self, start_date, end_date):
40
- pipeline = self.prepare_query(start_date, end_date)
41
-
42
- fetched_data = list(self.collection.aggregate(pipeline))
43
-
44
- return fetched_data[0]
45
-
46
- def str_to_datetime(self, date_str):
47
- year, month, day = [int(num) for num in date_str.split("-")]
48
-
49
- date = datetime.strptime(date_str, "%Y-%m-%d")
50
- date = self.timezone.localize(date)
51
-
52
- season = (month - 1) // 3 + 1
53
-
54
- return StatsDateTime(date, year, month, day, season)