PyPI - neurostats-API - Versions diffs - 0.0.21b0__py3-none-any.whl → 0.0.23__py3-none-any.whl - Mend

neurostats-API 0.0.21b0py3-none-any.whl → 0.0.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

neurostats_API/__init__.py +1 -1
neurostats_API/fetchers/balance_sheet.py +152 -102
neurostats_API/fetchers/base.py +93 -74
neurostats_API/fetchers/cash_flow.py +143 -113
neurostats_API/fetchers/finance_overview.py +28 -28
neurostats_API/fetchers/institution.py +211 -97
neurostats_API/fetchers/margin_trading.py +121 -94
neurostats_API/fetchers/month_revenue.py +139 -105
neurostats_API/fetchers/profit_lose.py +203 -108
neurostats_API/fetchers/tech.py +117 -42
neurostats_API/fetchers/tej_finance_report.py +248 -338
neurostats_API/fetchers/value_invest.py +32 -12
neurostats_API/tools/company_list/tw.json +2175 -0
neurostats_API/tools/tej_db/tej_db_percent_index.yaml +0 -3
neurostats_API/tools/tej_db/tej_db_skip_index.yaml +14 -1
neurostats_API/tools/tej_db/tej_db_thousand_index.yaml +0 -5
neurostats_API/utils/__init__.py +0 -1
neurostats_API/utils/calculate_value.py +102 -1
neurostats_API/utils/data_process.py +53 -19
neurostats_API/utils/logger.py +21 -0
{neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23.dist-info}/METADATA +2 -2
neurostats_API-0.0.23.dist-info/RECORD +35 -0
neurostats_API/utils/fetcher.py +0 -1056
neurostats_API-0.0.21b0.dist-info/RECORD +0 -34
/neurostats_API/tools/{balance_sheet.yaml → twse/balance_sheet.yaml} +0 -0
/neurostats_API/tools/{cash_flow_percentage.yaml → twse/cash_flow_percentage.yaml} +0 -0
/neurostats_API/tools/{finance_overview_dict.yaml → twse/finance_overview_dict.yaml} +0 -0
/neurostats_API/tools/{profit_lose.yaml → twse/profit_lose.yaml} +0 -0
/neurostats_API/tools/{seasonal_data_field_dict.txt → twse/seasonal_data_field_dict.txt} +0 -0
{neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23.dist-info}/WHEEL +0 -0
{neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23.dist-info}/top_level.txt +0 -0

neurostats_API/fetchers/profit_lose.py CHANGED Viewed

@@ -3,11 +3,10 @@ import importlib.resources as pkg_resources
 import json
 import numpy as np
 import pandas as pd
-from ..utils import StatsDateTime, StatsProcessor
+from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
 import yaml
 class ProfitLoseFetcher(StatsFetcher):
     """
     iFa.ai: 財務分析 -> 損益表
@@ -16,143 +15,239 @@ class ProfitLoseFetcher(StatsFetcher):
     def __init__(self, ticker, db_client):
         super().__init__(ticker, db_client)
-        self.table_settings = StatsProcessor.load_yaml("profit_lose.yaml")
+        self.table_settings = StatsProcessor.load_yaml("twse/profit_lose.yaml")
+        self.process_function_map = {
+            "twse_stats": self.process_data_twse,
+            "us_stats": self.process_data_us
+        }
-    def prepare_query(self, target_season):
+        self.return_keys = [
+            'profit_lose', 'grand_total_profit_lose', 'revenue', 'grand_total_revenue',
+            'gross_profit', 'grand_total_gross_profit', 'gross_profit_percentage',
+            'grand_total_gross_profit_percentage', 'operating_income', 'grand_total_operating_income', 'operating_income_percentage',
+            'grand_total_operating_income_percentage', 'net_income_before_tax', 'grand_total_net_income_before_tax', 'net_income_before_tax_percentage',
+            'grand_total_net_income_before_tax_percentage', 'net_income', 'grand_total_net_income', 'net_income_percentage',
+            'grand_total_income_percentage', 'EPS', 'EPS_growth', 'grand_total_EPS',
+            'grand_total_EPS_growth', 'profit_lose_all', 'profit_lose_YoY'
+        ]
+    def prepare_query(self):
         pipeline = super().prepare_query()
-        pipeline.append({
+        name_map = {"twse_stats": "profit_lose", "us_stats": "income_statement"}
+        chart_name = name_map.get(self.collection_name, "income_statement")
+        append_pipeline = [
+        {
             "$project": {
                 "_id": 0,
                 "ticker": 1,
                 "company_name": 1,
-                "profit_loses": {
-                    "$sortArray": {
-                        "input": {
-                            "$map": {
-                                "input": {
-                                    "$filter": {
-                                        "input": "$seasonal_data",
-                                        "as": "season",
-                                        "cond": {
-                                            "$eq":
-                                            ["$$season.season", target_season]
-                                        }
-                                    }
-                                },
-                                "as": "target_season_data",
-                                "in": {
-                                    "year":
-                                    "$$target_season_data.year",
-                                    "season":
-                                    "$$target_season_data.season",
-                                    "profit_lose":
-                                    "$$target_season_data.profit_lose"
-                                }
-                            }
-                        },
-                        "sortBy": {
-                            "year": -1
-                        }  # 按 year 降序排序
+                "seasonal_data": {
+                    "$map": {
+                        "input": {"$ifNull": ["$seasonal_data", []]},
+                        "as": "season",
+                        "in": {
+                            "year": "$$season.year",
+                            "season": "$$season.season",
+                            "data": {"$ifNull": [f"$$season.{chart_name}", []]}
+                        }
                     }
                 }
             }
-        })
-        return pipeline
+        }
+    ]
-    def collect_data(self, target_season):
-        pipeline = self.prepare_query(target_season)
+        pipeline = pipeline + append_pipeline
-        fetched_data = self.collection.aggregate(pipeline)
+        return pipeline
-        return list(fetched_data)[-1]
+    def collect_data(self):
+        return super().collect_data()
     def query_data(self):
-        try:
-            latest_time = StatsDateTime.get_latest_time(
-                self.ticker, self.collection)['last_update_time']
-            target_season = latest_time['seasonal_data']['latest_season']
-        except Exception as e:
-            today = StatsDateTime.get_today()
-            target_season = today.season
-            target_season = target_season - 1 if target_season > 1 else 4
+        fetched_data = self.collect_data()
-        fetched_data = self.collect_data(target_season)
+        fetched_data = fetched_data[0]
-        return self.process_data(fetched_data, target_season)
+        process_fn = self.process_function_map.get(
+            self.collection_name, self.process_data_us
+        )
+        return process_fn(fetched_data)
-    def process_data(self, fetched_data, target_season):
+    def process_data_twse(self, fetched_data):
-        profit_loses = fetched_data['profit_loses']
+        latest_time = StatsDateTime.get_latest_time(
+            self.ticker, self.collection
+        ).get('last_update_time', {})
-        index_names = []
-        table_dict = dict()
-        grand_total_dict = dict()
+        # 取最新時間資料時間，沒取到就預設去年年底
+        target_year = latest_time.get('seasonal_data', {}).get(
+            'latest_target_year',
+            StatsDateTime.get_today().year - 1
+        )
+        target_season = latest_time.get('seasonal_data',
+                                        {}).get('latest_season', 4)
         return_dict = {
-            "ticker": fetched_data['ticker'],
+            "ticker": self.ticker,
             "company_name": fetched_data['company_name'],
         }
-        for data in profit_loses:
-            year = data['year']
-            time_index = f"{year}Q{target_season}"
-            # 蒐集整體的keys
-            index_names += list(data['profit_lose'].keys())
-            profit_lose = data['profit_lose']
-            for index_name, value_dict in profit_lose.items():
-                # (2020Q1, 項目, 金額或%)
-                for item_name, item in value_dict.items():
-                    if ('percentage' in item_name):
-                        if (isinstance(item, (float, int))):
-                            item = StatsProcessor.cal_non_percentage(item, to_str=True, postfix="%")
-                    elif ('YoY' in item_name):
-                        if (isinstance(item, (float, int))):
-                            item = StatsProcessor.cal_percentage(item)
-                    elif ('每股盈餘' in index_name):
-                        if (isinstance(item, (float, int))):
-                            item = StatsProcessor.cal_non_percentage(item,  postfix="元")
-                    else:
-                        if (isinstance(item, (float, int))):
-                            item = StatsProcessor.cal_non_percentage(item,  postfix="千元")
-                    try:
-                        table_dict[index_name][(time_index, item_name)] = item
-                    except KeyError:
-                        if (index_name not in table_dict.keys()):
-                            table_dict[index_name] = dict()
-                            grand_total_dict[index_name] = dict()
-                        table_dict[index_name][(time_index, item_name)] = item
-        total_table = pd.DataFrame.from_dict(table_dict, orient='index')
-        total_table.columns = pd.MultiIndex.from_tuples(total_table.columns)
-        total_table = total_table.replace("N/A", None)
+        seasonal_data = fetched_data.get('seasonal_data', [])
+        if (not seasonal_data):
+            return_dict.update(self._get_empty_structure())
+            return return_dict
+        profit_lose_dict = {
+            f"{data['year']}Q{data['season']}": data['data']
+            for data in seasonal_data
+        }
+        profit_lose_dict = YoY_Calculator.cal_QoQ(profit_lose_dict)
+        profit_lose_df = pd.DataFrame.from_dict(profit_lose_dict)
+        target_season_col = profit_lose_df.columns.str.endswith(
+            f"Q{target_season}"
+        )
+        profit_lose_df = profit_lose_df.loc[:, target_season_col]
+        old_profit_lose_df = StatsProcessor.expand_value_percentage(
+            profit_lose_df
+        )
+        # OLD: 回傳包含value & percentage
+        value_col = old_profit_lose_df.columns.str.endswith(f"_value")
+        percentage_col = old_profit_lose_df.columns.str.endswith(f"_percentage")
+        # OLD: 回傳剔除grand_total
+        grand_total_value_col = old_profit_lose_df.columns.str.endswith(
+            f"grand_total_value"
+        )
+        grand_total_percentage_col = old_profit_lose_df.columns.str.endswith(
+            f"grand_total_percentage"
+        )
+        old_profit_lose_df = old_profit_lose_df.loc[:, (
+            (value_col & ~grand_total_value_col) |
+            (percentage_col & ~grand_total_percentage_col)
+        )]
+        for time_index, data_dict in profit_lose_dict.items():
+            profit_lose_dict[time_index] = self.flatten_dict(
+                value_dict=data_dict,
+                indexes=list(data_dict.keys()),
+                target_keys=[
+                    "value", "growth", "percentage", "grand_total",
+                    "grand_total_percentage"
+                ] + [f"YoY_{i}" for i in [1, 3, 5, 10]] +
+                [f"grand_total_YoY_{i}" for i in [1, 3, 5, 10]]
+            )
+        profit_lose_df = pd.DataFrame.from_dict(profit_lose_dict).T
+        # EPS的value用元計算
+        eps_index = (
+            profit_lose_df.columns.str.endswith("_value")
+            & profit_lose_df.columns.str.contains("每股盈餘")
+        )
+        eps_copy = profit_lose_df.loc[:, eps_index].copy()
+        eps_mask_index = eps_copy.columns
+        profit_lose_df[eps_mask_index] = profit_lose_df[eps_mask_index].map(
+            lambda x: StatsProcessor.cal_non_percentage(x, postfix="元")
+        )
+        # percentage處理
+        percentage_index = profit_lose_df.columns.str.endswith("percentage")
+        growth_index = profit_lose_df.columns.str.endswith("growth")
+        percentage_mask = (percentage_index | growth_index)
+        percentage_copy = profit_lose_df.loc[:, percentage_mask]
+        percentage_mask_index = percentage_copy.columns
+        profit_lose_df[percentage_mask_index] = profit_lose_df[
+            percentage_mask_index].map(
+                lambda x: StatsProcessor.
+                cal_non_percentage(x, to_str=True, postfix="%")
+            )
+        # YoY處理: 乘以100
+        YoY_index = profit_lose_df.columns.str.contains("YoY")
+        YoY_mask = YoY_index
+        YoY_copy = profit_lose_df.loc[:, YoY_mask]
+        YoY_mask_cols = YoY_copy.columns
+        profit_lose_df[YoY_mask_cols] = profit_lose_df[YoY_mask_cols].map(
+            lambda x: StatsProcessor.cal_percentage(x)
+        )
+        # 剩下的處理: 乘以千元
+        value_index = ~(
+            percentage_index | growth_index | YoY_index | eps_index
+        )    # 除了上述以外的 index
+        value_col = profit_lose_df.loc[:, value_index].columns
+        profit_lose_df[value_col] = profit_lose_df[value_col].map(
+            lambda x: StatsProcessor.cal_non_percentage(x, postfix="千元")
+        )
+        total_table = profit_lose_df.replace("N/A", None).T
+        # 取特定季度
+        target_season_columns = total_table.columns.str.endswith(
+            f"Q{target_season}"
+        )
+        total_table_YoY = total_table.loc[:, target_season_columns]
         for name, setting in self.table_settings.items():
-            if ('target_index' in setting.keys()):
-                target_indexes = [target.strip() for target in setting['target_index']]
-            else:
-                target_indexes = [None]
+            target_indexes = setting.get('target_index', [None])
             for target_index in target_indexes:
                 try:
-                    return_dict[name] = StatsProcessor.slice_multi_col_table(
-                        total_table=total_table,
+                    return_dict[name] = StatsProcessor.slice_table(
+                        total_table=total_table_YoY,
                         mode=setting['mode'],
-                        target_index=target_index)
+                        target_index=target_index
+                    )
                     break
                 except Exception as e:
-                    return_dict[name] = StatsProcessor.slice_multi_col_table(
-                        total_table=total_table,
-                        mode=setting['mode'],
-                        target_index=target_index)
+                    continue
+        return_dict.update(
+            {
+                "profit_lose": old_profit_lose_df,
+                "profit_lose_all": total_table.copy(),
+                "profit_lose_YoY": total_table_YoY
+            }
+        )
+        return return_dict
+    def process_data_us(self, fetched_data):
+        table_dict = {
+            f"{data['year']}Q{data['season']}": data['profit_lose']
+            for data in fetched_data
+        }
+        table_dict = YoY_Calculator.cal_QoQ(table_dict)
+        table_dict = YoY_Calculator.cal_YoY(table_dict)
+        for time_index, data_dict in table_dict.items():
+            table_dict[time_index] = self.flatten_dict(
+                value_dict=data_dict,
+                indexes=list(data_dict.keys()),
+                target_keys=["value", "growth"] +
+                [f"YoY_{i}" for i in [1, 3, 5, 10]]
+            )
+        # 計算QoQ
+        return_dict = {
+            "ticker": self.ticker,
+            "company_name": fetched_data[-1]['company_name'],
+            "profit_lose": pd.DataFrame.from_dict(table_dict)
+        }
         return return_dict
+    def _get_empty_structure(self):
+        return {
+            key: pd.DataFrame(columns= pd.Index([], name = 'date')) for key in self.return_keys
+        }

neurostats_API/fetchers/tech.py CHANGED Viewed

@@ -12,7 +12,10 @@ class TechFetcher(StatsFetcher):
         """
         super().__init__(ticker, db_client)
-        self.collection = self.db["TWN/APIPRCD"]
+        if (ticker in self.tw_company_list.keys()):
+            self.twse_collection = self.db['twse_stats']
+            self.tej_collection = self.db["TWN/APIPRCD"]
         self.full_ohlcv = self._get_ohlcv()
         self.basic_indexes = [
             'SMA5', 'SMA20', 'SMA60', 'EMA5', 'EMA20', 'EMA40', 'EMA12',
@@ -51,53 +54,47 @@ class TechFetcher(StatsFetcher):
         required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
-        try:
-            # 先對yf search
-            if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
-                full_tick = f'^{self.ticker}'
-            else:
-                full_tick = f'{self.ticker}.tw'
+        if self.ticker in ['GSPC', 'IXIC', 'DJI', 'TWII']:
+            full_tick = f'^{self.ticker}'
             df = self.conduct_yf_search(full_tick)
-            if not self.has_required_columns(df, required_cols):
-                print(f".tw failed, try .two")
-                full_tick = f'{self.ticker}.two'
-                df = self.conduct_yf_search(full_tick)
+            return df[required_cols]
-                if (df.empty):
-                    raise ValueError(f"No data found for ticker: {self.ticker}")
-        except (KeyError, ValueError, TypeError) as e:
-            # 再對TEJ search
-            tej_required_cols = [
-                "mdate", "open_d", 'high_d', 'low_d', 'close_d', 'vol'
+        elif(self.ticker in self.tw_company_list.keys()):
+            search_fns = [
+                self.conduct_db_search_twse,
+                self.conduct_db_search_tej,
+                lambda: self.conduct_yf_search(f'{self.ticker}.tw'),
+                lambda: self.conduct_yf_search(f'{self.ticker}.two')
             ]
-            tej_name_proj = {
-                tej_name: org_name
-                for tej_name, org_name in zip(tej_required_cols, required_cols)
-            }
-            query = {'ticker': self.ticker}
-            ticker_full = self.collection.find_one(query)
-            if not ticker_full:
-                raise ValueError("No ticker found in database")
-            daily_data = ticker_full.get("data", [])
-            if not isinstance(daily_data, list):
-                raise TypeError("Expected 'daily_data' to be a list.")
-            df = pd.DataFrame(daily_data)
-            if not self.has_required_columns(df, tej_required_cols):
-                raise KeyError(f"Missing required columns")
-            df = df.rename(columns=tej_name_proj)
+            for search_method in search_fns:
+                try:
+                    df = search_method()
+                    break
+                except (KeyError, ValueError, TypeError):
+                    continue
+            else:
+                return pd.DataFrame(columns=required_cols)
+            # break跳出後
+            return df[required_cols]
+        else:  # 美股
+            search_fns = [
+                self.conduct_db_search_us,
+                lambda : self.conduct_yf_search(f"{self.ticker}")
+            ]
+            for search_method in search_fns:
+                try:
+                    df = search_method()
+                    break
+                except (KeyError, ValueError, TypeError):
+                    continue
+            else:
+                df = pd.DataFrame()
-        return df[required_cols]
+            return df
     def get_daily(self):
@@ -141,7 +138,85 @@ class TechFetcher(StatsFetcher):
         )
         return df
+    def conduct_db_search_tej(self):
+        # 再對TEJ search
+        tej_required_cols = [
+            "mdate", "open_d", 'high_d', 'low_d', 'close_d', 'vol'
+        ]
+        required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
+        tej_name_proj = {
+            tej_name: org_name
+            for tej_name, org_name in zip(tej_required_cols, required_cols)
+        }
+        query = {'ticker': self.ticker}
+        ticker_full = self.collection.find_one(query)
+        if not ticker_full:
+            raise ValueError("No ticker found in database")
+        daily_data = ticker_full.get("data", [])
+        if not isinstance(daily_data, list):
+            raise TypeError("Expected 'daily_data' to be a list.")
+        df = pd.DataFrame(daily_data)
+        if not self.has_required_columns(df, tej_required_cols):
+            raise KeyError(f"Missing required columns")
+        df = df.rename(columns=tej_name_proj)
+        return df[required_cols]
+    def conduct_db_search_us(self):
+        required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
+        query = {'ticker': self.ticker}
+        filter_query = {"daily_data" : 1, "_id": 0}
+        ticker_full = self.collection.find_one(query, filter_query)
+        if not ticker_full:
+            raise ValueError("No ticker found in database")
+        daily_data = ticker_full.get("daily_data", [])
+        if not isinstance(daily_data, list):
+            raise TypeError("Expected 'daily_data' to be a list.")
+        df = pd.DataFrame(daily_data)
+        if not self.has_required_columns(df, required_cols):
+            missing_cols = [col for col in required_cols if col not in df.columns]
+            missing_cols = ",".join(missing_cols)
+            for col in missing_cols:
+                df[col] = pd.NA
+        return df[required_cols]
+    def conduct_db_search_twse(self):
+        required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
+        match_query = {"ticker" : self.ticker}
+        proj_query = {"_id": 0, "daily_data": 1}
+        full_data = self.twse_collection.find_one(match_query, proj_query)
+        if (not full_data):
+            raise ValueError("No ticker found in database twse_stats")
+        daily_data = full_data.get("daily_data", [])
+        if (not isinstance(daily_data, list)):
+            raise ValueError("No ticker found in database twse_stats")
+        df = pd.DataFrame(daily_data)
+        if not self.has_required_columns(df, required_cols):
+            raise KeyError(f"Missing required columns")
+        df = df[required_cols]
+        df = df.sort_values(by = 'date').drop_duplicates(subset=['date'])
+        return df
 class TechProcessor:

neurostats-API 0.0.21b0__py3-none-any.whl → 0.0.23__py3-none-any.whl

neurostats-API 0.0.21b0py3-none-any.whl → 0.0.23py3-none-any.whl