PyPI - neurostats-API - Versions diffs - 0.0.21__tar.gz → 0.0.22__tar.gz - Mend

neurostats-API 0.0.21tar.gz → 0.0.22tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

neurostats_api-0.0.22/MANIFEST.in ADDED Viewed

@@ -0,0 +1,9 @@
+recursive-include neurostats_api/tools/tej_db *.yaml
+recursive-include neurostats_api/tools/tej_db *.txt
+recursive-include neurostats_api/tools/tej_db *.json
+recursive-include neurostats_api/tools/twse *.yaml
+recursive-include neurostats_api/tools/twse *.txt
+recursive-include neurostats_api/tools/twse *.json
+recursive-include neurostats_api/tools/company_list *.yaml
+recursive-include neurostats_api/tools/company_list *.txt
+recursive-include neurostats_api/tools/company_list *.json

{neurostats_api-0.0.21 → neurostats_api-0.0.22}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: neurostats_API
-Version: 0.0.21
+Version: 0.0.22
 Summary: The service of NeuroStats website
 Home-page: https://github.com/NeurowattStats/NeuroStats_API.git
 Author: JasonWang@Neurowatt
@@ -8,7 +8,7 @@ Author-email: jason@neurowatt.ai
 Requires-Python: >=3.6
 Description-Content-Type: text/markdown
 Requires-Dist: numpy
-Requires-Dist: pandas>=2.2.0
+Requires-Dist: pandas
 Requires-Dist: pymongo
 Requires-Dist: pytz
 Requires-Dist: python-dotenv
@@ -89,7 +89,7 @@ pip install neurostats-API
 ```Python
 >>> import neurostats_API
 >>> print(neurostats_API.__version__)
-0.0.21
+0.0.22
 ```
 ### 得到最新一期的評價資料與歷年評價

{neurostats_api-0.0.21 → neurostats_api-0.0.22}/README.md RENAMED Viewed

@@ -73,7 +73,7 @@ pip install neurostats-API
 ```Python
 >>> import neurostats_API
 >>> print(neurostats_API.__version__)
-0.0.21
+0.0.22
 ```
 ### 得到最新一期的評價資料與歷年評價

{neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__='0.0.20'
+__version__='0.0.22'
 from .fetchers import (
     BalanceSheetFetcher,

neurostats_api-0.0.22/neurostats_API/fetchers/balance_sheet.py ADDED Viewed

@@ -0,0 +1,178 @@
+from .base import StatsFetcher, StatsDateTime
+import json
+import pandas as pd
+from ..utils import StatsDateTime, StatsProcessor
+import yaml
+class BalanceSheetFetcher(StatsFetcher):
+    """
+    對應iFa.ai -> 財務分析 ->  資產負債表
+    """
+    def __init__(self, ticker, db_client):
+        super().__init__(ticker, db_client)
+        self.table_settings = StatsProcessor.load_yaml("twse/balance_sheet.yaml")
+        self.process_function_map = {
+            "twse_stats": self.process_data_twse,
+            "us_stats": self.process_data_us
+        }
+    def prepare_query(self):
+        pipeline = super().prepare_query()
+        pipeline = pipeline + [
+            {
+                "$unwind": "$seasonal_data"    # 展開 seasonal_data 陣列
+            },
+            {
+                "$project": {
+                    "_id": 0,
+                    "ticker": 1,
+                    "company_name": 1,
+                    "year": "$seasonal_data.year",
+                    "season": "$seasonal_data.season",
+                    "balance_sheet": {
+                        "$ifNull": ["$seasonal_data.balance_sheet", []]
+                    }    # 避免 null
+                }
+            },
+            {
+                "$sort": {
+                    "year": -1,
+                    "season": -1
+                }
+            }
+        ]
+        return pipeline
+    def collect_data(self):
+        return super().collect_data()
+    def query_data(self):
+        fetched_data = self.collect_data()
+        process_fn = self.process_function_map[self.collection_name]
+        processed_data = process_fn(fetched_data)
+        return processed_data
+    def process_data_twse(self, fetched_data):
+        latest_time = StatsDateTime.get_latest_time(
+            self.ticker, self.collection
+        ).get('last_update_time', {})
+        # 取最新時間資料時間，沒取到就預設去年年底
+        target_year = latest_time.get('seasonal_data', {}).get(
+            'latest_target_year',
+            StatsDateTime.get_today().year - 1
+        )
+        target_season = latest_time.get('seasonal_data',
+                                        {}).get('latest_season', 4)
+        return_dict = {
+            "ticker": self.ticker,
+            "company_name": fetched_data[-1]['company_name']
+        }
+        table_dict = {}
+        # 將value與percentage跟著年分季度一筆筆取出
+        for data in fetched_data:
+            year, season, balance_sheet = data['year'], data['season'], data[
+                'balance_sheet']
+            time_index = f"{year}Q{season}"
+            new_balance_sheet = dict()
+            # 蒐集整體的keys
+            index_names = list(balance_sheet.keys())
+            table_dict[time_index] = balance_sheet
+            # flatten dict:
+            # {<key>: {"value": <value>, "percentage": <value>}}
+            # -> {<key>_value: <value>, <key>_percentage:<value>}
+        old_balance_sheet = pd.DataFrame(table_dict)
+        target_season_col = old_balance_sheet.columns.str.endswith(f"Q{target_season}")
+        old_balance_sheet = old_balance_sheet.loc[:, target_season_col]
+        old_balance_sheet = StatsProcessor.expand_value_percentage(old_balance_sheet)
+        for time_index, data_dict in table_dict.items():
+            new_balance_sheet = self.flatten_dict(
+                balance_sheet,
+                indexes = index_names,
+                target_keys=["value", "percentage"]
+            )
+            table_dict[time_index] = new_balance_sheet
+        total_table = pd.DataFrame.from_dict(table_dict)
+        value_index = total_table.index.str.endswith("_value")
+        total_table.loc[value_index, :] = (
+            total_table.loc[value_index, :].apply(
+                lambda x: StatsProcessor.cal_non_percentage(x, postfix="元"),
+                axis=0
+            )
+        )
+        percenrage_index = total_table.index.str.endswith(
+            "_percentage"
+        )
+        total_table.loc[percenrage_index, :] = (
+            total_table.loc[percenrage_index, :].apply(
+                lambda x: StatsProcessor.
+                cal_non_percentage(x, to_str=True, postfix="%"),
+                axis=0
+            )
+        )
+        target_season_columns = total_table.columns.str.endswith(
+            f"Q{target_season}"
+        )
+        total_table_YoY = total_table.loc[:, target_season_columns]
+        for name, setting in self.table_settings.items():
+            target_indexes = setting.get('target_index', [None])
+            for target_index in target_indexes:
+                try:
+                    return_dict[name] = StatsProcessor.slice_old_table(
+                        total_table=old_balance_sheet,
+                        target_index=target_index
+                    )
+                    break
+                except Exception as e:
+                    print(f"error : {str(e)}")
+                    continue
+        return_dict.update(
+            {
+                "balance_sheet": old_balance_sheet,
+                "balance_sheet_all": total_table.copy(),
+                "balance_sheet_YoY": total_table_YoY
+            }
+        )
+        return return_dict
+    def process_data_us(self, fetched_data):
+        return_dict = {
+            "ticker": self.ticker,
+            "company_name": fetched_data[-1]['company_name']
+        }
+        table_dict = dict()
+        for data in fetched_data:
+            year, season, balance_sheet = data['year'], data['season'], data[
+                'balance_sheet']
+            table_dict[f"{year}Q{season}"] = balance_sheet
+        table_dict = pd.DataFrame.from_dict(table_dict)
+        return_dict["balance_sheet"] = table_dict
+        latest_season = fetched_data[0]['season']
+        target_season_columns = table_dict.columns.str.endswith(
+            f"Q{latest_season}"
+        )
+        table_dict_YoY = table_dict.loc[:, target_season_columns]
+        return_dict["balance_sheet_YoY"] = table_dict_YoY
+        return return_dict

{neurostats_api-0.0.21 → neurostats_api-0.0.22}/neurostats_API/fetchers/base.py RENAMED Viewed

@@ -1,22 +1,31 @@
 import abc
-from typing import Union
-from pymongo import MongoClient
-import pandas as pd
+from datetime import datetime, timedelta, date
 import json
+import pandas as pd
+from pymongo import MongoClient
 import pytz
-from datetime import datetime, timedelta, date
+from typing import Union
 from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
-import yaml
-class StatsFetcher:
+class StatsFetcher(abc.ABC):
-    def __init__(self, ticker, db_client):
+    def __init__(self, ticker: str, db_client: MongoClient):
         self.ticker = ticker
-        self.db = db_client["company"]    # Replace with your database name
-        self.collection = self.db["twse_stats"]
         self.timezone = pytz.timezone("Asia/Taipei")
+        self.tw_company_list = StatsProcessor.load_json("company_list/tw.json")
+        db_mapping = {
+            "company": "twse_stats",
+            "company_us": "us_stats",
+        }
+        name_mapping = {"company": "台股", "company_us": "美股"}
+        db_name = "company" if self.ticker in self.tw_company_list else "company_us"
+        self.db = db_client[db_name]
+        self.collection_name = db_mapping.get(db_name, "unknown")
+        assert self.collection_name != "unknown", f"請確認 {ticker} 是否是 {','.join(list(name_mapping.values()))}"
+        self.collection = db_client[db_name][self.collection_name]
         self.target_metric_dict = {
             'value': ['value'],
@@ -37,40 +46,41 @@ class StatsFetcher:
                 }
             },
         ]
+    def query_data(self):
+        return NotImplementedError()
-    def collect_data(self, start_date, end_date):
+    def collect_data(self):
         pipeline = self.prepare_query()
         fetched_data = list(self.collection.aggregate(pipeline))
+        return fetched_data if fetched_data else None
-        return fetched_data[0]
-    def str_to_datetime(self, date_str):
-        year, month, day = [int(num) for num in date_str.split("-")]
-        date = datetime.strptime(date_str, "%Y-%m-%d")
-        date = self.timezone.localize(date)
+    def str_to_datetime(self, date_str: str) -> StatsDateTime:
+        date = self.timezone.localize(datetime.strptime(date_str, "%Y-%m-%d"))
+        year, month, day = date.year, date.month, date.day
         season = (month - 1) // 3 + 1
         return StatsDateTime(date, year, month, day, season)
-    def has_required_columns(self, df: pd.DataFrame, required_cols=None):
-        """
-        Check if the required columns are present in the DataFrame.
-        Args:
-            df (pd.DataFrame): The DataFrame to check.
-            required_cols (list, optional): List of required column names.
-                                            Defaults to ['date', 'open', 'high', 'low', 'close', 'volume'].
-        Returns:
-            bool: True if all required columns are present, False otherwise.
-        """
+    def has_required_columns(
+        self, df: pd.DataFrame, required_cols=None
+    ) -> bool:
         if required_cols is None:
             required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
         return all(col in df.columns for col in required_cols)
+    @staticmethod
+    def flatten_dict(value_dict, indexes, target_keys):
+        indexes = value_dict.keys()
+        new_dict = {}
+        for key in indexes:
+            new_dict.update(
+                {
+                    f"{key}_{sub_key}": value_dict[key].get(sub_key, None)
+                    for sub_key in target_keys
+                }
+            )
+        return new_dict
 class BaseTEJFetcher(abc.ABC):
@@ -81,21 +91,14 @@ class BaseTEJFetcher(abc.ABC):
     def get_latest_data_time(self, ticker):
         latest_data = self.collection.find_one(
-            {
-                "ticker": ticker
-            },
-            {
+            {"ticker": ticker}, {
                 "last_update": 1,
                 "_id": 0
             }
         )
-        try:
-            latest_date = latest_data['last_update']["latest_data_date"]
-        except Exception as e:
-            latest_date = None
-        return latest_date
+        # return 得到最新日期或None
+        return latest_data.get('last_update', {}).get("latest_data_date", None)
     def process_value(self, value):
         if isinstance(value, str) and "%" in value:
@@ -107,77 +110,89 @@ class BaseTEJFetcher(abc.ABC):
     def calculate_growth(self, this_value, last_value, delta):
         try:
-            return YoY_Calculator.cal_growth(this_value, last_value, delta) * 100
+            return YoY_Calculator.cal_growth(
+                this_value, last_value, delta
+            ) * 100
         except Exception:
             return None
-    def cal_YoY(self, data_dict: dict, start_year: int, end_year: int, season: int):
+    def cal_YoY(
+        self, data_dict: dict, start_year: int, end_year: int, season: int
+    ):
         year_shifts = [1, 3, 5, 10]
         return_dict = {}
         for year in range(start_year, end_year + 1):
             year_data = data_dict.get(f"{year}Q{season}", {}).copy()
             if not year_data:
                 continue
-            for key in list(year_data.keys()):
+            for key, value in list(year_data.items()):
                 if key == "season":
                     continue
-                this_value = self.process_value(year_data[key])
+                this_value = self.process_value(value)
                 if this_value is None:
                     year_data.pop(key)
                     continue
-                temp_dict = {"value": year_data[key]}
+                temp_dict = {"value": value}
                 for shift in year_shifts:
-                    past_year = year - shift
-                    last_value = data_dict.get(f"{past_year}Q{season}", {}).get(key)
-                    last_value = self.process_value(last_value)
-                    growth = self.calculate_growth(this_value, last_value, shift) if last_value is not None else None
+                    past_value = self.process_value(
+                        data_dict.get(f"{year - shift}Q{season}", {}).get(key)
+                    )
+                    growth = self.calculate_growth(this_value, past_value, shift) if past_value else None
+                    temp_dict[
+                        f"YoY_{shift}"
+                    ] = f"{growth:.2f}%" if growth else None
-                    temp_dict[f"YoY_{shift}"] = (f"{growth:.2f}%" if growth else None)
                 year_data[key] = temp_dict
             return_dict[f"{year}Q{season}"] = year_data
         return return_dict
     def cal_QoQ(self, data_dict):
         return_dict = {}
         for time_index, this_data in data_dict.items():
             year, season = map(int, time_index.split("Q"))
-            last_year, last_season = (year - 1, 4) if season == 1 else (year, season - 1)
+            last_year, last_season = (
+                year - 1, 4
+            ) if season == 1 else (year, season - 1)
             for key in list(this_data.keys()):
                 if key == "season":
                     continue
                 this_value = self.process_value(this_data[key])
                 if this_value is None:
                     this_data.pop(key)
                     continue
                 temp_dict = {"value": this_data[key]}
-                last_value = data_dict.get(f"{last_year}Q{last_season}", {}).get(key, {}).get('value')
+                last_value = data_dict.get(
+                    f"{last_year}Q{last_season}",{}
+                ).get(key, {}).get('value')
                 last_value = self.process_value(last_value)
-                growth = self.calculate_growth(this_value, last_value, 1) if last_value is not None else None
+                growth = self.calculate_growth(
+                    this_value, last_value, 1
+                ) if last_value is not None else None
                 temp_dict['growth'] = (f"{growth:.2f}%" if growth else None)
                 this_data[key] = temp_dict
             return_dict[time_index] = this_data
         return return_dict
     def get_dict_of_df(self, data_dict):
         """
         dict[dict] -> dict[df]
         """
-        for key in data_dict.keys():
-            data_dict[key] = pd.DataFrame.from_dict(data_dict[key])
-        return data_dict
+        return {
+            key: pd.DataFrame.from_dict(data) for key, data in data_dict.items()
+        }
     def set_time_shift(self, date: Union[str, datetime], period: str):
         if isinstance(date, str):
@@ -197,4 +212,4 @@ class BaseTEJFetcher(abc.ABC):
         if period == "all":
             return datetime.strptime("1991-01-01", "%Y-%m-%d")
-        return date - period_mapping.get(period, timedelta(days=0))  # 預設為不變"
+        return date - period_mapping.get(period, timedelta(days=0))    # 預設為不變"

neurostats_api-0.0.22/neurostats_API/fetchers/cash_flow.py ADDED Viewed

@@ -0,0 +1,200 @@
+from .base import StatsFetcher, StatsDateTime
+import json
+import numpy as np
+import pandas as pd
+from ..utils import StatsDateTime, StatsProcessor
+import importlib.resources as pkg_resources
+import yaml
+class CashFlowFetcher(StatsFetcher):
+    def __init__(self, ticker, db_client):
+        super().__init__(ticker, db_client)
+        self.cash_flow_dict = StatsProcessor.load_yaml(
+            "twse/cash_flow_percentage.yaml"
+        )  # 計算子表格用
+        self.process_function_map = {
+            "twse_stats": self.process_data_twse,
+            "us_stats": self.process_data_us
+        }
+    def prepare_query(self):
+        pipeline = super().prepare_query()
+        name_map = {
+            "twse_stats": "cash_flow",
+            "us_stats": "cash_flow"
+        }
+        chart_name = name_map.get(self.collection_name, "cash_flow")
+        append_pipeline = [
+            {
+                "$unwind": "$seasonal_data"    # 展開 seasonal_data 陣列
+            },
+            {
+                "$project": {
+                    "_id": 0,
+                    "ticker": 1,
+                    "company_name": 1,
+                    "year": "$seasonal_data.year",
+                    "season": "$seasonal_data.season",
+                    "cash_flow": {
+                        "$ifNull": [f"$seasonal_data.{chart_name}", []]
+                    }    # 避免 null
+                }
+            },
+            {
+                "$sort": {
+                    "year": -1,
+                    "season": -1
+                }
+            }
+        ]
+        pipeline = pipeline + append_pipeline
+        return pipeline
+    def collect_data(self):
+        return super().collect_data()
+    def query_data(self):
+        fetched_data = self.collect_data()
+        process_fn = self.process_function_map.get(self.collection_name, self.process_data_us)
+        return process_fn(fetched_data)
+    def process_data_twse(self, fetched_data):
+        """
+        處理現金流量表頁面的所有表格
+        金流表本身沒有比例 但是Ifa有算，
+        項目所屬的情況也不一(分別所屬營業,投資,籌資三個活動)
+        所以這裡選擇不用slicing處理
+        """
+        index_names = []
+        column_names = []
+        table_dict = dict()
+        CASHO_dict = dict()
+        CASHI_dict = dict()
+        CASHF_dict = dict()
+        # 處理cash_flow 比例
+        checkpoints = ["營業活動之現金流量－間接法", "投資活動之現金流量", "籌資活動之現金流量", "匯率變動對現金及約當現金之影響"]
+        main_cash_flows = [
+            "營業活動之淨現金流入（流出）", "投資活動之淨現金流入（流出）", "籌資活動之淨現金流入（流出）", None
+        ] # 主要的比例對象
+        partial_cash_flows = [CASHO_dict, CASHI_dict, CASHF_dict, dict()]
+        # 作法: dictionary中也有checkpoints，如果出現了就換下一個index去計算
+        for data in fetched_data:
+            year, season, cash_flow = data['year'], data['season'], data['cash_flow']
+            time_index = f"{year}Q{season}"
+            main_cash_flow_name = None
+            partial_cash_flow = None
+            next_checkpoint = 0
+            temp_dict = {}
+            for index_name, cash_flow_value in cash_flow.items():
+                if (next_checkpoint < 3
+                        and index_name == checkpoints[next_checkpoint]): # 找到了主要的變動點
+                    main_cash_flow_name = main_cash_flows[next_checkpoint]
+                    partial_cash_flow = partial_cash_flows[next_checkpoint]
+                    partial_cash_flow[time_index] = {}
+                    next_checkpoint += 1
+                if (isinstance(cash_flow_value, dict)):
+                    value = cash_flow_value.get('value', None)
+                else:
+                    value = cash_flow_value
+                main_value = cash_flow.get(main_cash_flow_name, None)
+                if (isinstance(main_value, dict)):
+                    main_value = main_value.get('value', None)
+                else:
+                    pass
+                try:
+                    ratio = np.round(
+                        (value / main_value) * 100, 2
+                    )
+                    ratio = f"{ratio}%"
+                except:
+                    ratio = None
+                value = StatsProcessor.cal_non_percentage(value, postfix="千元")
+                temp_dict[index_name] = {
+                    "value" : value,
+                    "percentage": ratio
+                }
+                partial_cash_flow[time_index][index_name] = temp_dict[index_name]
+            table_dict[time_index] = temp_dict
+            index_names += list(cash_flow.keys())
+        # 轉成dictionary keys
+        index_names = list(dict.fromkeys(index_names))
+        cash_flow_table = pd.DataFrame(table_dict)
+        cash_flow_table_stats = StatsProcessor.expand_value_percentage(cash_flow_table)
+        CASHO_table = pd.DataFrame(CASHO_dict)
+        CASHO_table = StatsProcessor.expand_value_percentage(CASHO_table)
+        CASHI_table = pd.DataFrame(CASHI_dict)
+        CASHI_table = StatsProcessor.expand_value_percentage(CASHI_table)
+        CASHF_table = pd.DataFrame(CASHF_dict)
+        CASHF_table = StatsProcessor.expand_value_percentage(CASHF_table)
+        for time_index in table_dict.keys():
+            table_dict[time_index] = self.flatten_dict(table_dict[time_index], index_names, target_keys=['value', 'percentage'])
+        cash_flow_flatten = pd.DataFrame.from_dict(table_dict)
+        target_season = fetched_data[0]['season']
+        target_season_column = cash_flow_flatten.columns.str.endswith(f"Q{target_season}")
+        return_dict = {
+            "ticker": self.ticker,
+            "company_name": fetched_data[-1]['company_name'],
+            "cash_flow": cash_flow_table_stats,
+            "CASHO": CASHO_table,
+            "CASHI": CASHI_table,
+            "CASHF": CASHF_table,
+            "cash_flow_all": cash_flow_flatten,
+            "cash_flow_YoY": cash_flow_flatten.loc[:, target_season_column]
+        }
+        return return_dict
+    def process_data_us(self, fetched_data):
+        table_dict = {
+            f"{data['year']}Q{data['season']}": data['cash_flow']
+            for data in fetched_data
+        }
+        cash_flow_df = pd.DataFrame.from_dict(table_dict)
+        latest_season = fetched_data[0]['season']
+        target_season_columns = cash_flow_df.columns.str.endswith(
+            f"Q{latest_season}"
+        )
+        cash_flow_df_YoY = cash_flow_df.loc[:, target_season_columns]
+        return_dict = {
+            "ticker": self.ticker,
+            "company_name": fetched_data[-1]['company_name'],
+            "cash_flow": cash_flow_df,
+            "cash_flow_YoY": cash_flow_df_YoY
+        }
+        return return_dict

neurostats-API 0.0.21__tar.gz → 0.0.22__tar.gz

neurostats-API 0.0.21tar.gz → 0.0.22tar.gz