neurostats-API 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- __version__='0.0.16'
1
+ __version__='0.0.18'
2
2
 
3
3
  from .fetchers import (
4
4
  BalanceSheetFetcher,
@@ -53,6 +53,23 @@ class StatsFetcher:
53
53
  season = (month - 1) // 3 + 1
54
54
 
55
55
  return StatsDateTime(date, year, month, day, season)
56
+
57
+ def has_required_columns(self, df:pd.DataFrame, required_cols=None):
58
+ """
59
+ Check if the required columns are present in the DataFrame.
60
+
61
+ Args:
62
+ df (pd.DataFrame): The DataFrame to check.
63
+ required_cols (list, optional): List of required column names.
64
+ Defaults to ['date', 'open', 'high', 'low', 'close', 'volume'].
65
+
66
+ Returns:
67
+ bool: True if all required columns are present, False otherwise.
68
+ """
69
+ if required_cols is None:
70
+ required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
71
+
72
+ return all(col in df.columns for col in required_cols)
56
73
 
57
74
 
58
75
  class BaseTEJFetcher(abc.ABC):
@@ -80,7 +97,11 @@ class BaseTEJFetcher(abc.ABC):
80
97
  year_shifts = [1, 3, 5, 10]
81
98
  return_dict = {}
82
99
  for year in range(start_year, end_year + 1):
83
- year_data = data_dict[f"{year}Q{season}"]
100
+ try:
101
+ year_data = data_dict[f"{year}Q{season}"].copy()
102
+ except KeyError as e:
103
+ continue
104
+
84
105
  year_keys = list(year_data.keys())
85
106
  for key in year_keys:
86
107
  if (key in 'season'):
@@ -92,7 +113,7 @@ class BaseTEJFetcher(abc.ABC):
92
113
  for shift in year_shifts:
93
114
  this_value = year_data[key]
94
115
  try:
95
- past_year = str(year - shift)
116
+ past_year = year - shift
96
117
  last_value = data_dict[f"{past_year}Q{season}"][key]
97
118
  temp_dict[f"YoY_{shift}"] = YoY_Calculator.cal_growth(this_value, last_value, delta=shift)
98
119
  except Exception as e:
@@ -47,40 +47,46 @@ class TechFetcher(StatsFetcher):
47
47
  )
48
48
 
49
49
  def _get_ohlcv(self):
50
-
51
- if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
52
-
53
- full_tick = f'^{self.ticker}'
54
- yf_ticker = yf.Ticker(full_tick)
55
- origin_df = yf_ticker.history(period="10y")
56
- origin_df = origin_df.reset_index()
57
- origin_df["Date"] = pd.to_datetime(origin_df["Date"]).dt.date
58
- df = origin_df.rename(
59
- columns={
60
- "Date": "date",
61
- "Open": "open",
62
- "High": "high",
63
- "Low": "low",
64
- "Close": "close",
65
- "Volume": "volume"
66
- }
67
- )
68
- else:
69
50
 
51
+ required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
52
+
53
+ try:
70
54
  query = {'ticker': self.ticker}
71
- ticker_full = list(self.collection.find(query))
55
+ ticker_full = self.collection.find_one(query)
72
56
 
73
57
  if not ticker_full:
74
58
  raise ValueError(f"No data found for ticker: {self.ticker}")
75
59
 
76
- if 'daily_data' not in ticker_full[0] or ticker_full[0]['daily_data'] is None:
77
- raise KeyError("Missing 'daily_data' in the retrieved data")
60
+ daily_data = ticker_full.get("daily_data", [])
61
+ if not isinstance(daily_data, list):
62
+ raise TypeError("Expected 'daily_data' to be a list.")
63
+
64
+ df = pd.DataFrame(daily_data)
78
65
 
79
- df = pd.DataFrame(ticker_full[0]['daily_data'])
66
+ if not self.has_required_columns(df, required_cols):
67
+ raise KeyError(f"Missing required columns")
80
68
 
81
- selected_cols = ['date','open','high','low','close','volume']
69
+ except (KeyError, ValueError, TypeError) as e:
70
+
71
+ print(f"Conduct yf searching")
72
+
73
+ if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
74
+ full_tick = f'^{self.ticker}'
75
+ else:
76
+ full_tick = f'{self.ticker}.tw'
77
+
78
+ df = self.conduct_yf_search(full_tick)
79
+
80
+ if not self.has_required_columns(df, required_cols):
81
+
82
+ print(f".tw failed, try .two")
83
+
84
+ full_tick = f'{self.ticker}.two'
85
+
86
+ df = self.conduct_yf_search(full_tick)
87
+
88
+ return df[required_cols]
82
89
 
83
- return df[selected_cols]
84
90
 
85
91
  def get_daily(self):
86
92
 
@@ -101,6 +107,29 @@ class TechFetcher(StatsFetcher):
101
107
  def get_yearly(self):
102
108
 
103
109
  return self.yearly_index
110
+
111
+ def conduct_yf_search(self, ticker:str):
112
+
113
+ yf_ticker = yf.Ticker(ticker)
114
+ origin_df = yf_ticker.history(period="10y")
115
+
116
+ if origin_df.empty:
117
+ return origin_df
118
+
119
+ origin_df = origin_df.reset_index()
120
+ origin_df["Date"] = pd.to_datetime(origin_df["Date"])
121
+ df = origin_df.rename(
122
+ columns={
123
+ "Date": "date",
124
+ "Open": "open",
125
+ "High": "high",
126
+ "Low": "low",
127
+ "Close": "close",
128
+ "Volume": "volume"
129
+ }
130
+ )
131
+
132
+ return df
104
133
 
105
134
  class TechProcessor:
106
135
 
@@ -5,6 +5,7 @@ import pandas as pd
5
5
  from pymongo import MongoClient
6
6
  from ..utils import StatsProcessor, YoY_Calculator
7
7
  import warnings
8
+ import yaml
8
9
 
9
10
 
10
11
  class FinanceReportFetcher(BaseTEJFetcher):
@@ -20,18 +21,8 @@ class FinanceReportFetcher(BaseTEJFetcher):
20
21
  self.db = self.client[db_name]
21
22
  self.collection = self.db[collection_name]
22
23
 
23
- # yapf: disabled
24
- self.check_index = {
25
- 'coid', 'mdate', 'key3', 'no', 'sem', 'merg', 'curr', 'annd', 'fin_ind', 'bp11', 'bp21', 'bp22', 'bp31',
26
- 'bp41', 'bp51', 'bp53', 'bp61', 'bp62', 'bp63', 'bp64', 'bp65', 'bf11', 'bf12', 'bf21', 'bf22', 'bf41',
27
- 'bf42', 'bf43', 'bf44', 'bf45', 'bf99', 'bsca', 'bsnca', 'bsta', 'bscl', 'bsncl', 'bstl', 'bsse', 'bslse',
28
- 'debt', 'quick', 'ppe', 'ar', 'ip12', 'ip22', 'ip31', 'ip51', 'iv41', 'if11', 'isibt', 'isni', 'isnip',
29
- 'eps', 'ispsd', 'gm', 'opi', 'nri', 'ri', 'nopi', 'ebit', 'cip31', 'cscfo', 'cscfi', 'cscff', 'person',
30
- 'shares', 'wavg', 'taxrate', 'r104', 'r115', 'r105', 'r106', 'r107', 'r108', 'r201', 'r112', 'r401', 'r402',
31
- 'r403', 'r404', 'r405', 'r408', 'r409', 'r410', 'r502', 'r501', 'r205', 'r505', 'r517', 'r512', 'r509',
32
- 'r608', 'r616', 'r610', 'r607', 'r613', 'r612', 'r609', 'r614', 'r611', 'r307', 'r304', 'r305', 'r306',
33
- 'r316', 'r834'
34
- } # yapf: enabled
24
+ index_dict = StatsProcessor.load_yaml("tej_db_index.yaml")
25
+ self.check_index = set(index_dict[collection_name])
35
26
 
36
27
  def get(
37
28
  self,
@@ -59,17 +50,19 @@ class FinanceReportFetcher(BaseTEJFetcher):
59
50
  if (difference):
60
51
  warnings.warn(f"{list(difference)} 沒有出現在資料表中,請確認column名稱是否正確", UserWarning)
61
52
 
53
+ if (not start_date):
54
+ start_date = datetime.strptime("2005-01-01", "%Y-%m-%d")
55
+ else:
56
+ start_date = datetime.strptime(start_date, "%Y-%m-%d")
57
+
62
58
  if (fetch_mode in {self.FetchMode.QOQ, self.FetchMode.QOQ_NOCAL}):
63
- if (not start_date):
64
- warnings.warn("No start_date specified, use default date = \"2005-01-01\"", UserWarning)
65
- start_date = datetime.strptime("2005-01-01", "%Y-%m-%d")
59
+
66
60
  if (not end_date):
67
- warnings.warn("No end_date specified, use default date = today", UserWarning)
68
61
  end_date = datetime.today()
62
+ else:
63
+ end_date = datetime.strptime(end_date, "%Y-%m-%d")
69
64
 
70
65
  assert (start_date <= end_date)
71
- start_date = datetime.strptime(start_date, "%Y-%m-%d")
72
- end_date = datetime.strptime(end_date, "%Y-%m-%d")
73
66
 
74
67
  start_year = start_date.year
75
68
  start_season = (start_date.month - 1) // 4 + 1
@@ -94,7 +87,6 @@ class FinanceReportFetcher(BaseTEJFetcher):
94
87
  return data_df
95
88
 
96
89
  elif (fetch_mode in {self.FetchMode.YOY, self.FetchMode.YOY_NOCAL}):
97
- start_date = datetime.strptime(start_date, "%Y-%m-%d")
98
90
  start_year = start_date.year
99
91
  end_date = self.get_latest_data_time(ticker)
100
92
  if (not end_date):
@@ -225,7 +217,6 @@ class FinanceReportFetcher(BaseTEJFetcher):
225
217
  ]
226
218
 
227
219
  fetched_data = self.collection.aggregate(pipeline).to_list()
228
-
229
220
  data_dict = StatsProcessor.list_of_dict_to_dict(
230
221
  fetched_data, keys=["year", "season"], delimeter="Q", data_key=report_type)
231
222
 
@@ -0,0 +1,135 @@
1
+ TWN/AINVFQ1:
2
+ - coid
3
+ - mdate
4
+ - key3
5
+ - 'no'
6
+ - sem
7
+ - merg
8
+ - curr
9
+ - annd
10
+ - fin_ind
11
+ - bp11
12
+ - bp21
13
+ - bp22
14
+ - bp31
15
+ - bp41
16
+ - bp51
17
+ - bp53
18
+ - bp61
19
+ - bp62
20
+ - bp63
21
+ - bp64
22
+ - bp65
23
+ - bf11
24
+ - bf12
25
+ - bf21
26
+ - bf22
27
+ - bf41
28
+ - bf42
29
+ - bf43
30
+ - bf44
31
+ - bf45
32
+ - bf99
33
+ - bsca
34
+ - bsnca
35
+ - bsta
36
+ - bscl
37
+ - bsncl
38
+ - bstl
39
+ - bsse
40
+ - bslse
41
+ - debt
42
+ - quick
43
+ - ppe
44
+ - ar
45
+ - ip12
46
+ - ip22
47
+ - ip31
48
+ - ip51
49
+ - iv41
50
+ - if11
51
+ - isibt
52
+ - isni
53
+ - isnip
54
+ - eps
55
+ - ispsd
56
+ - gm
57
+ - opi
58
+ - nri
59
+ - ri
60
+ - nopi
61
+ - ebit
62
+ - cip31
63
+ - cscfo
64
+ - cscfi
65
+ - cscff
66
+ - person
67
+ - shares
68
+ - wavg
69
+ - taxrate
70
+ - r104
71
+ - r115
72
+ - r105
73
+ - r106
74
+ - r107
75
+ - r108
76
+ - r201
77
+ - r112
78
+ - r401
79
+ - r402
80
+ - r403
81
+ - r404
82
+ - r405
83
+ - r408
84
+ - r409
85
+ - r410
86
+ - r502
87
+ - r501
88
+ - r205
89
+ - r505
90
+ - r517
91
+ - r512
92
+ - r509
93
+ - r608
94
+ - r616
95
+ - r610
96
+ - r607
97
+ - r613
98
+ - r612
99
+ - r609
100
+ - r614
101
+ - r611
102
+ - r307
103
+ - r304
104
+ - r305
105
+ - r306
106
+ - r316
107
+ - r834
108
+ TWN/AFESTM1:
109
+ - coid
110
+ - mdate
111
+ - key3
112
+ - 'no'
113
+ - sem
114
+ - merg
115
+ - curr
116
+ - annd
117
+ - fin_ind
118
+ - ip12
119
+ - gm
120
+ - opi
121
+ - isibt
122
+ - isni
123
+ - isnip
124
+ - r306
125
+ - r316
126
+ - eps
127
+ - r105
128
+ - r106
129
+ - r107
130
+ - r108
131
+ - r401
132
+ - r402
133
+ - r403
134
+ - r404
135
+ - r405
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: neurostats_API
3
- Version: 0.0.16
3
+ Version: 0.0.18
4
4
  Summary: The service of NeuroStats website
5
5
  Home-page: https://github.com/NeurowattStats/NeuroStats_API.git
6
6
  Author: JasonWang@Neurowatt
@@ -774,43 +774,48 @@ YoY_10 1.420500e-01 1.586797e-01 1.551364e-01
774
774
  }
775
775
  ```
776
776
 
777
+ ### 公司自結資料
778
+ ```Python
779
+ from neurostats_API import FinanceReportFetcher
777
780
 
778
- ## 版本紀錄
779
- ## 0.0.16
780
- - 處理ValueFetcher的error #issue76
781
-
782
- - tej_fetcher新增 QOQ, YOY功能
783
-
784
- ## 0.0.15
785
- - TechFetcher中新增指數條件
786
-
787
- - 新增tej_fetcher索取TEJ相關的資料
788
-
789
- - package新增depensnecy,可以安裝需要的相關package
781
+ fetcher = FinanceReportFetcher(
782
+ mongo_uri = mongo_uri,
783
+ db_name = db_name,
784
+ collection_name = collection_name
785
+ )
790
786
 
791
- ## 0.0.14
792
- - 修改部分財報資料錯誤的乘以1000的問題
787
+ data = fetcher.get(
788
+ ticker = "2330" # 任意的股票代碼
789
+ fetch_mode = fetcher.FetchMode.QOQ_NOCAL # 取得模式
790
+ start_date = "2005-01-01",
791
+ end_date = "2024-12-31",
792
+ report_type = "Q",
793
+ indexes = []
794
+ ) # -> pd.DataFrame or Dict[pd.DataFrame]
795
+ ```
796
+ - `ticker`: 股票代碼
793
797
 
794
- - 新增例外處理: 若資料庫對於季資料一部分index缺失的情況下仍會盡可能去將資料蒐集並呈現
798
+ - `fetch_mode` : 取得模式,為`fetcher.YOY_NOCAL` 或 `fetcher.QOQ_NOCAL`
799
+ - `YOY_NOCAL`: 以end_date為準,取得與end_date同季的歷年資料,時間範圍以start_date為起始
800
+ > 例如`start_date = "2020-07-01"`, `end_date = "2024-01-01"`,會回傳2020~2024的第一季資料
795
801
 
796
- ### 0.0.13
797
- - value_fetcher 新增獲得一序列評價的功能
802
+ - `QOQ_NOCAL`: 時間範圍內的每季資料
798
803
 
799
- ### 0.0.12
800
- - 新增資券變化(margin trading)
804
+ - `QOQ`: 時間範圍內每季的每個index的數值以及QoQ
801
805
 
802
- - 修改法人買賣(institution_trading)的query方式
806
+ - `YoY`: 以end_date為準,取得與end_date同季的歷年資料以及成長率,時間範圍以start_date為起始
803
807
 
804
- ### 0.0.11
805
- - 修復財務分析的千元計算問題
808
+ - `start_date`: 開始日期,不設定時預設為`2005-01-01`
806
809
 
807
- - 籌碼面新增法人買賣(institution_trading)
810
+ - `end_date`: 結束日期,不設定時預設為資料庫最新資料的日期
808
811
 
809
- - 將財報三表與月營收的資料型態與數值做轉換(%轉字串, 千元乘以1000)
810
- ### 0.0.10
811
- - 更新指標的資料型態: 單位為千元乘以1000之後回傳整數
812
+ - `report_type`: 選擇哪種報告,預設為`Q`
813
+ - `A`: 當年累計
814
+ - `Q`: 當季數值
815
+ - `TTM`: 移動四季 (包括當季在內,往前累計四個季度)
812
816
 
813
- - 處理銀行公司在finanace_overview會報錯誤的問題(未完全解決,因銀行公司財報有許多名稱不同,目前都會顯示為None)
817
+ - `indexes`: 選擇的column,需要以TEJ提供的欄位名稱為準,不提供時或提供`[]`會回傳全部column
818
+ - 範例輸入: `['bp41', 'bp51']`
814
819
 
815
- ### 0.0.9
816
- - 更新指標的資料型態: 單位為日, %, 倍轉為字串
820
+ [TEJ資料集連結](https://tquant.tejwin.com/%E8%B3%87%E6%96%99%E9%9B%86/)
821
+ 請看 `公司自結數`
@@ -1,30 +1,31 @@
1
- neurostats_API/__init__.py,sha256=5ToELVqNOIdVJrMj5G8JvbyRIjvo1FxcP6e-a-iMe1Y,261
1
+ neurostats_API/__init__.py,sha256=S8SovSxsgsbHC7Ddhoka7I5juDmObJGSSX-As0KVcZE,261
2
2
  neurostats_API/cli.py,sha256=UJSWLIw03P24p-gkBb6JSEI5dW5U12UvLf1L8HjQD-o,873
3
3
  neurostats_API/main.py,sha256=QcsfmWivg2Dnqw3MTJWiI0QvEiRs0VuH-BjwQHFCv00,677
4
4
  neurostats_API/fetchers/__init__.py,sha256=B4aBwVzf_X-YieEf3fZteU0qmBPVIB9VjrmkyWhLK18,489
5
5
  neurostats_API/fetchers/balance_sheet.py,sha256=sQv4Gk5uoKURLEdh57YknOQWiyVwaXJ2Mw75jxNqUS0,5804
6
- neurostats_API/fetchers/base.py,sha256=rcrKW2PTJYfBsxsiGpCYiVTK9pQW4aALYLixIjvNMUk,4890
6
+ neurostats_API/fetchers/base.py,sha256=AgSqjxSwmYGxhn8HaFcl-yyAE7r59l4FcFGhWSEitMA,5677
7
7
  neurostats_API/fetchers/cash_flow.py,sha256=TY7VAWVXkj5-mzH5Iu0sIE-oV8MvGmmDy0URNotNV1E,7614
8
8
  neurostats_API/fetchers/finance_overview.py,sha256=PxUdWY0x030olYMLcCHDBn068JLmCE2RTOce1dxs5vM,27753
9
9
  neurostats_API/fetchers/institution.py,sha256=UrcBc6t7u7CnEwUsf6YmLbbJ8VncdWpq8bCz17q2dgs,11168
10
10
  neurostats_API/fetchers/margin_trading.py,sha256=lQImtNdvaBoSlKhJvQ3DkH3HjSSgKRJz4ZZpyR5-Z4I,10433
11
11
  neurostats_API/fetchers/month_revenue.py,sha256=nixX2llzjCFr2m2YVjxrSfkBusnZPrPb2dRDq1XLGhw,4251
12
12
  neurostats_API/fetchers/profit_lose.py,sha256=EN9Y0iamcAaHMZdjHXO6b_2buLnORssf8ZS7A0hi74s,5896
13
- neurostats_API/fetchers/tech.py,sha256=Hol1bcwJ_ERcnoTXNWlqqaWOuzdl7MeiAjCvzQMZDTg,12269
14
- neurostats_API/fetchers/tej_finance_report.py,sha256=laXph2ca1LCFocZjjdvtzmm5fcUecHk2Gs5h6-XMSWY,12967
13
+ neurostats_API/fetchers/tech.py,sha256=8U6kn7cvWJsmKIMn_f2l6U9H_NBy_OwOXlS26XhFIv0,12926
14
+ neurostats_API/fetchers/tej_finance_report.py,sha256=O_mqrMY_A1pGR5geq1fS4Zm1AREKHzC2hKBEk4gA8hs,11822
15
15
  neurostats_API/fetchers/value_invest.py,sha256=b_x2Dpgs8VBU5HdG8ocKtfIEkqhU-Q0S5n6RxuFuM2g,7467
16
16
  neurostats_API/tools/balance_sheet.yaml,sha256=6XygNG_Ybb1Xkk1e39LMLKr7ATvaCP3xxuwFbgNl6dA,673
17
17
  neurostats_API/tools/cash_flow_percentage.yaml,sha256=fk2Z4eb1JjGFvP134eJatHacB7BgTkBenhDJr83w8RE,1345
18
18
  neurostats_API/tools/finance_overview_dict.yaml,sha256=B9nV75StXkrF3yv2-eezzitlJ38eEK86RD_VY6588gQ,2884
19
19
  neurostats_API/tools/profit_lose.yaml,sha256=iyp9asYJ04vAxk_HBUDse_IBy5oVvYHpwsyACg5YEeg,3029
20
20
  neurostats_API/tools/seasonal_data_field_dict.txt,sha256=X8yc_el6p8BH_3FikTqBVFGsvWdXT6MHXLfKfi44334,8491
21
+ neurostats_API/tools/tej_db_index.yaml,sha256=lu-cmbB6dhx0eUlBSkyzXWqPKlwRtEvqlMTAh2y0oHs,969
21
22
  neurostats_API/utils/__init__.py,sha256=0tJCRmlJq2aDwcNNW-oEaA9H0OxTJMFvjpVYtG4AvZU,186
22
23
  neurostats_API/utils/calculate_value.py,sha256=lUKSsWU76XRmDUcmi4eDjoQxjb3vWpAAKInF9w49VNI,782
23
24
  neurostats_API/utils/data_process.py,sha256=A--dzOsu42jRxqqCD41gTtjE5rhEBYmhB6y-AnCvo5U,8986
24
25
  neurostats_API/utils/datetime.py,sha256=XJya4G8b_-ZOaBbMXgQjWh2MC4wc-o6goQ7EQJQMWrQ,773
25
26
  neurostats_API/utils/db_client.py,sha256=OYe6yazcR4Aa6jYmy47JrryUeh2NnKGqY2K_lSZe6i8,455
26
27
  neurostats_API/utils/fetcher.py,sha256=VbrUhjA-GG5AyjPX2SHtFIbZM4dm3jo0RgZzuCbb_Io,40927
27
- neurostats_API-0.0.16.dist-info/METADATA,sha256=9US1mdwWnOCAnwfsOj-ZLRCfo07p3yd0UfwMKS6989g,29848
28
- neurostats_API-0.0.16.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
29
- neurostats_API-0.0.16.dist-info/top_level.txt,sha256=nSlQPMG0VtXivJyedp4Bkf86EOy2TpW10VGxolXrqnU,15
30
- neurostats_API-0.0.16.dist-info/RECORD,,
28
+ neurostats_API-0.0.18.dist-info/METADATA,sha256=S8Fn1OmtZ0kxsl2BXdqyy0T2KYISxkMJPvqzv2glgPM,30350
29
+ neurostats_API-0.0.18.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
30
+ neurostats_API-0.0.18.dist-info/top_level.txt,sha256=nSlQPMG0VtXivJyedp4Bkf86EOy2TpW10VGxolXrqnU,15
31
+ neurostats_API-0.0.18.dist-info/RECORD,,