PyPI - neurostats-API - Versions diffs - 0.0.23b1__py3-none-any.whl → 0.0.24__py3-none-any.whl - Mend

neurostats-API 0.0.23b1py3-none-any.whl → 0.0.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

neurostats_API/__init__.py +1 -1
neurostats_API/fetchers/balance_sheet.py +63 -39
neurostats_API/fetchers/base.py +6 -2
neurostats_API/fetchers/cash_flow.py +91 -70
neurostats_API/fetchers/finance_overview.py +26 -26
neurostats_API/fetchers/institution.py +216 -97
neurostats_API/fetchers/macro_daily_event.py +8 -0
neurostats_API/fetchers/margin_trading.py +121 -94
neurostats_API/fetchers/month_revenue.py +139 -105
neurostats_API/fetchers/profit_lose.py +103 -83
neurostats_API/fetchers/tech.py +25 -27
neurostats_API/fetchers/tej_finance_report.py +101 -69
neurostats_API/fetchers/value_invest.py +32 -12
neurostats_API/tools/tej_db/tej_db_percent_index.yaml +0 -3
neurostats_API/utils/calculate_value.py +5 -2
neurostats_API/utils/data_process.py +12 -6
neurostats_API/utils/logger.py +21 -0
{neurostats_API-0.0.23b1.dist-info → neurostats_API-0.0.24.dist-info}/METADATA +2 -2
neurostats_API-0.0.24.dist-info/RECORD +36 -0
neurostats_API-0.0.23b1.dist-info/RECORD +0 -34
{neurostats_API-0.0.23b1.dist-info → neurostats_API-0.0.24.dist-info}/WHEEL +0 -0
{neurostats_API-0.0.23b1.dist-info → neurostats_API-0.0.24.dist-info}/top_level.txt +0 -0

neurostats_API/fetchers/profit_lose.py CHANGED Viewed

@@ -22,6 +22,16 @@ class ProfitLoseFetcher(StatsFetcher):
             "us_stats": self.process_data_us
         }
+        self.return_keys = [
+            'profit_lose', 'grand_total_profit_lose', 'revenue', 'grand_total_revenue',
+            'gross_profit', 'grand_total_gross_profit', 'gross_profit_percentage',
+            'grand_total_gross_profit_percentage', 'operating_income', 'grand_total_operating_income', 'operating_income_percentage',
+            'grand_total_operating_income_percentage', 'net_income_before_tax', 'grand_total_net_income_before_tax', 'net_income_before_tax_percentage',
+            'grand_total_net_income_before_tax_percentage', 'net_income', 'grand_total_net_income', 'net_income_percentage',
+            'grand_total_income_percentage', 'EPS', 'EPS_growth', 'grand_total_EPS',
+            'grand_total_EPS_growth', 'profit_lose_all', 'profit_lose_YoY'
+        ]
     def prepare_query(self):
         pipeline = super().prepare_query()
@@ -30,28 +40,25 @@ class ProfitLoseFetcher(StatsFetcher):
         chart_name = name_map.get(self.collection_name, "income_statement")
         append_pipeline = [
-            {
-                "$unwind": "$seasonal_data"    # 展開 seasonal_data 陣列
-            },
-            {
-                "$project": {
-                    "_id": 0,
-                    "ticker": 1,
-                    "company_name": 1,
-                    "year": "$seasonal_data.year",
-                    "season": "$seasonal_data.season",
-                    "profit_lose": {
-                        "$ifNull": [f"$seasonal_data.{chart_name}", []]
-                    }    # 避免 null
-                }
-            },
-            {
-                "$sort": {
-                    "year": -1,
-                    "season": -1
+        {
+            "$project": {
+                "_id": 0,
+                "ticker": 1,
+                "company_name": 1,
+                "seasonal_data": {
+                    "$map": {
+                        "input": {"$ifNull": ["$seasonal_data", []]},
+                        "as": "season",
+                        "in": {
+                            "year": "$$season.year",
+                            "season": "$$season.season",
+                            "data": {"$ifNull": [f"$$season.{chart_name}", []]}
+                        }
+                    }
                 }
             }
-        ]
+        }
+    ]
         pipeline = pipeline + append_pipeline
@@ -63,6 +70,8 @@ class ProfitLoseFetcher(StatsFetcher):
     def query_data(self):
         fetched_data = self.collect_data()
+        fetched_data = fetched_data[0]
         process_fn = self.process_function_map.get(
             self.collection_name, self.process_data_us
         )
@@ -79,101 +88,108 @@ class ProfitLoseFetcher(StatsFetcher):
             'latest_target_year',
             StatsDateTime.get_today().year - 1
         )
-        target_season = latest_time.get(
-            'seasonal_data',{}
-        ).get('latest_season', 4)
+        target_season = latest_time.get('seasonal_data',
+                                        {}).get('latest_season', 4)
         return_dict = {
             "ticker": self.ticker,
-            "company_name": fetched_data[-1]['company_name'],
+            "company_name": fetched_data['company_name'],
         }
+        seasonal_data = fetched_data.get('seasonal_data', [])
+        if (not seasonal_data):
+            return_dict.update(self._get_empty_structure())
+            return return_dict
         profit_lose_dict = {
-            f"{data['year']}Q{data['season']}": data['profit_lose']
-            for data in fetched_data
+            f"{data['year']}Q{data['season']}": data['data']
+            for data in seasonal_data
         }
+        profit_lose_dict = YoY_Calculator.cal_QoQ(profit_lose_dict)
         profit_lose_df = pd.DataFrame.from_dict(profit_lose_dict)
         target_season_col = profit_lose_df.columns.str.endswith(
             f"Q{target_season}"
         )
         profit_lose_df = profit_lose_df.loc[:, target_season_col]
-        profit_lose_df = StatsProcessor.expand_value_percentage(
+        old_profit_lose_df = StatsProcessor.expand_value_percentage(
             profit_lose_df
         )
-        value_col = profit_lose_df.columns.str.endswith(f"_value")
-        percentage_col = profit_lose_df.columns.str.endswith(f"_percentage")
-        grand_total_value_col = profit_lose_df.columns.str.endswith(
+        # OLD: 回傳包含value & percentage
+        value_col = old_profit_lose_df.columns.str.endswith(f"_value")
+        percentage_col = old_profit_lose_df.columns.str.endswith(f"_percentage")
+        # OLD: 回傳剔除grand_total
+        grand_total_value_col = old_profit_lose_df.columns.str.endswith(
             f"grand_total_value"
         )
-        grand_total_percentage_col = profit_lose_df.columns.str.endswith(
+        grand_total_percentage_col = old_profit_lose_df.columns.str.endswith(
             f"grand_total_percentage"
         )
-        profit_lose_stats_df = profit_lose_df.loc[:, (
+        old_profit_lose_df = old_profit_lose_df.loc[:, (
             (value_col & ~grand_total_value_col) |
             (percentage_col & ~grand_total_percentage_col)
         )]
-        for time_index, profit_lose in profit_lose_dict.items():
-            # 蒐集整體的keys
-            index_names = list(profit_lose.keys())
-            target_keys = [
-                "value",
-                "percentage",
-                "grand_total",
-                "grand_total_percentage",
-                "YoY_1",
-                "YoY_3",
-                "YoY_5",
-                "YoY_10",
-                "grand_total_YoY_1",
-                "grand_total_YoY_3",
-                "grand_total_YoY_5",
-                "grand_total_YoY_10",
-            ]
-            # flatten dict
-            new_profit_lose = self.flatten_dict(
-                profit_lose, index_names, target_keys
+        for time_index, data_dict in profit_lose_dict.items():
+            profit_lose_dict[time_index] = self.flatten_dict(
+                value_dict=data_dict,
+                indexes=list(data_dict.keys()),
+                target_keys=[
+                    "value", "growth", "percentage", "grand_total",
+                    "grand_total_percentage"
+                ] + [f"YoY_{i}" for i in [1, 3, 5, 10]] +
+                [f"grand_total_YoY_{i}" for i in [1, 3, 5, 10]]
             )
-            profit_lose_dict[time_index] = new_profit_lose
-        profit_lose_df = pd.DataFrame.from_dict(profit_lose_dict)
+        profit_lose_df = pd.DataFrame.from_dict(profit_lose_dict).T
         # EPS的value用元計算
-        eps_index = profit_lose_df.index.str.endswith(
-            "_value"
-        ) & profit_lose_df.index.str.contains("每股盈餘")
-        profit_lose_df.loc[eps_index] = profit_lose_df.loc[
-            eps_index].apply(
-                lambda x: StatsProcessor.cal_non_percentage(x, postfix="元")
-            )
+        eps_index = (
+            profit_lose_df.columns.str.endswith("_value")
+            & profit_lose_df.columns.str.contains("每股盈餘")
+        )
+        eps_copy = profit_lose_df.loc[:, eps_index].copy()
+        eps_mask_index = eps_copy.columns
+        profit_lose_df[eps_mask_index] = profit_lose_df[eps_mask_index].map(
+            lambda x: StatsProcessor.cal_non_percentage(x, postfix="元")
+        )
         # percentage處理
-        percentage_index = profit_lose_df.index.str.endswith("percentage")
-        profit_lose_df.loc[percentage_index] = profit_lose_df.loc[
-            percentage_index].apply(
+        percentage_index = profit_lose_df.columns.str.endswith("percentage")
+        growth_index = profit_lose_df.columns.str.endswith("growth")
+        percentage_mask = (percentage_index | growth_index)
+        percentage_copy = profit_lose_df.loc[:, percentage_mask]
+        percentage_mask_index = percentage_copy.columns
+        profit_lose_df[percentage_mask_index] = profit_lose_df[
+            percentage_mask_index].map(
                 lambda x: StatsProcessor.
                 cal_non_percentage(x, to_str=True, postfix="%")
             )
         # YoY處理: 乘以100
-        YoY_index = profit_lose_df.index.str.contains("YoY")
-        profit_lose_df.loc[YoY_index] = profit_lose_df.loc[
-            YoY_index].apply(lambda x: StatsProcessor.cal_percentage(x))
+        YoY_index = profit_lose_df.columns.str.contains("YoY")
+        YoY_mask = YoY_index
+        YoY_copy = profit_lose_df.loc[:, YoY_mask]
+        YoY_mask_cols = YoY_copy.columns
+        profit_lose_df[YoY_mask_cols] = profit_lose_df[YoY_mask_cols].map(
+            lambda x: StatsProcessor.cal_percentage(x)
+        )
         # 剩下的處理: 乘以千元
         value_index = ~(
-            percentage_index | YoY_index | profit_lose_df.index.isin(eps_index)
+            percentage_index | growth_index | YoY_index | eps_index
         )    # 除了上述以外的 index
-        profit_lose_df.loc[value_index] = profit_lose_df.loc[
-            value_index].apply(
-                lambda x: StatsProcessor.cal_non_percentage(x, postfix="千元")
-            )
-        total_table = profit_lose_df.replace("N/A", None)
+        value_col = profit_lose_df.loc[:, value_index].columns
+        profit_lose_df[value_col] = profit_lose_df[value_col].map(
+            lambda x: StatsProcessor.cal_non_percentage(x, postfix="千元")
+        )
+        total_table = profit_lose_df.replace("N/A", None).T
         # 取特定季度
         target_season_columns = total_table.columns.str.endswith(
@@ -192,12 +208,11 @@ class ProfitLoseFetcher(StatsFetcher):
                     )
                     break
                 except Exception as e:
-                    print(str(e))
                     continue
         return_dict.update(
             {
-                "profit_lose": profit_lose_stats_df,
+                "profit_lose": old_profit_lose_df,
                 "profit_lose_all": total_table.copy(),
                 "profit_lose_YoY": total_table_YoY
             }
@@ -207,8 +222,8 @@ class ProfitLoseFetcher(StatsFetcher):
     def process_data_us(self, fetched_data):
         table_dict = {
-            f"{data['year']}Q{data['season']}": data['profit_lose']
-            for data in fetched_data
+            f"{data['year']}Q{data['season']}": data['data']
+            for data in fetched_data['seasonal_data']
         }
         table_dict = YoY_Calculator.cal_QoQ(table_dict)
@@ -218,16 +233,21 @@ class ProfitLoseFetcher(StatsFetcher):
             table_dict[time_index] = self.flatten_dict(
                 value_dict=data_dict,
                 indexes=list(data_dict.keys()),
-                target_keys=["value", "growth"] +
-                            [f"YoY_{i}" for i in [1, 3, 5, 10]]
+                target_keys=["value", "growth"] +
+                [f"YoY_{i}" for i in [1, 3, 5, 10]]
             )
         # 計算QoQ
         return_dict = {
             "ticker": self.ticker,
-            "company_name": fetched_data[-1]['company_name'],
+            "company_name": fetched_data['company_name'],
             "profit_lose": pd.DataFrame.from_dict(table_dict)
         }
         return return_dict
+    def _get_empty_structure(self):
+        return {
+            key: pd.DataFrame(columns= pd.Index([], name = 'date')) for key in self.return_keys
+        }

neurostats_API/fetchers/tech.py CHANGED Viewed

@@ -138,31 +138,6 @@ class TechFetcher(StatsFetcher):
         )
         return df
-    def conduct_db_search_twse(self):
-        required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
-        match_query = {"ticker" : self.ticker}
-        proj_query = {"_id": 0, "daily_data": 1}
-        full_data = self.twse_collection.find_one(match_query, proj_query)
-        if (not full_data):
-            raise ValueError("No ticker found in database twse_stats")
-        daily_data = full_data.get("daily_data", [])
-        if (not isinstance(daily_data, list)):
-            raise ValueError("No ticker found in database twse_stats")
-        df = pd.DataFrame(daily_data)
-        if not self.has_required_columns(df, required_cols):
-            raise KeyError(f"Missing required columns")
-        df = df[required_cols]
-        df = df.sort_values(by = 'date').drop_duplicates(subset=['date'])
-        return df
     def conduct_db_search_tej(self):
         # 再對TEJ search
@@ -177,7 +152,7 @@ class TechFetcher(StatsFetcher):
         }
         query = {'ticker': self.ticker}
-        ticker_full = self.tej_collection.find_one(query)
+        ticker_full = self.collection.find_one(query)
         if not ticker_full:
             raise ValueError("No ticker found in database")
@@ -215,10 +190,33 @@ class TechFetcher(StatsFetcher):
             missing_cols = ",".join(missing_cols)
             for col in missing_cols:
                 df[col] = pd.NA
-            # raise KeyError(f"Missing required columns")
         return df[required_cols]
+    def conduct_db_search_twse(self):
+        required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
+        match_query = {"ticker" : self.ticker}
+        proj_query = {"_id": 0, "daily_data": 1}
+        full_data = self.twse_collection.find_one(match_query, proj_query)
+        if (not full_data):
+            raise ValueError("No ticker found in database twse_stats")
+        daily_data = full_data.get("daily_data", [])
+        if (not isinstance(daily_data, list)):
+            raise ValueError("No ticker found in database twse_stats")
+        df = pd.DataFrame(daily_data)
+        if not self.has_required_columns(df, required_cols):
+            raise KeyError(f"Missing required columns")
+        df = df[required_cols]
+        df = df.sort_values(by = 'date').drop_duplicates(subset=['date'])
+        return df
 class TechProcessor:

neurostats_API/fetchers/tej_finance_report.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from .base import BaseTEJFetcher
 from datetime import datetime
 from enum import Enum
+import numpy as np
 import pandas as pd
 from pymongo import MongoClient
 from .tech import TechProcessor
@@ -127,6 +128,8 @@ class FinanceReportFetcher(BaseTEJFetcher):
             lower_bound_year, lower_bound_season, report_type, indexes
         )
         fetched_data = self.collection.aggregate(pipeline).to_list()
+        fetched_data = fetched_data[0]
+        fetched_data = fetched_data.get('data', []) if isinstance(fetched_data, dict) else []
         data_dict = self.transform_value(
             StatsProcessor.list_of_dict_to_dict(
@@ -174,11 +177,14 @@ class FinanceReportFetcher(BaseTEJFetcher):
             year_based=True
         )
         fetched_data = self.collection.aggregate(pipeline).to_list()
+        fetched_data = fetched_data[0]
+        fetched_data = fetched_data.get('data', []) if isinstance(fetched_data, dict) else []
         data_dict = self.transform_value(
             StatsProcessor.list_of_dict_to_dict(
-                data_list=fetched_data,
-                keys=["year", "season"],
-                delimeter="Q",
+                data_list=fetched_data,
+                keys=["year", "season"],
+                delimeter="Q",
                 data_key=report_type
             )
         )
@@ -198,9 +204,9 @@ class FinanceReportFetcher(BaseTEJFetcher):
                                   (self.percent_index_list, "%")]:
             process_list = list(set(data_df.index) & set(category))
             if postfix == "%":
-                data_df.loc[process_list] = data_df.loc[process_list].astype(
-                    str
-                ).map(lambda x: f"{x}%")
+                data_df = data_df.T
+                data_df[process_list] = data_df[process_list].map(lambda x: f"{x}%") # if (not np.isnan(x)) else None)
+                data_df = data_df.T
             else:
                 data_df.loc[process_list] = data_df.loc[process_list].map(
                     lambda x: StatsProcessor.
@@ -221,67 +227,80 @@ class FinanceReportFetcher(BaseTEJFetcher):
         indexes,
         year_based=False
     ):
-        project_stage = {
-            "_id": 0,
-            "data.year": 1,
-            "data.season": 1,
-            **{
-                f"data.{report_type}.{idx}": 1
-                for idx in indexes
-            }
-        } if indexes else {
-            "_id": 0,
-            "data.year": 1,
-            "data.season": 1,
-            f"data.{report_type}": 1
-        }
-        if (year_based):
-            match_stage = {
-                "data.year": {
-                    "$in": start_year
-                } if year_based else {
-                    "$gt": start_year,
-                    "$lt": end_year
-                },
-                "data.season": start_season
+        if year_based:
+            filter_cond = {
+                "$and": [
+                    { "$in": ["$$item.year", start_year] },
+                    { "$eq": ["$$item.season", start_season] }
+                ]
             }
         else:
-            match_stage = {
+            filter_cond = {
                 "$or": [
                     {
-                        "data.year": {
-                            "$gt": start_year,
-                            "$lt": end_year
-                        }
-                    }, {
-                        "data.year": start_year,
-                        "data.season": {
-                            "$gte": start_season
-                        }
-                    }, {
-                        "data.year": end_year,
-                        "data.season": {
-                            "$lte": end_season
-                        }
-                    }, {
-                        "data.year": lower_bound_year,
-                        "data.season": lower_bound_season
+                        "$and": [
+                            { "$gt": ["$$item.year", start_year] },
+                            { "$lt": ["$$item.year", end_year] }
+                        ]
+                    },
+                    {
+                        "$and": [
+                            { "$eq": ["$$item.year", start_year] },
+                            { "$gte": ["$$item.season", start_season] }
+                        ]
+                    },
+                    {
+                        "$and": [
+                            { "$eq": ["$$item.year", end_year] },
+                            { "$lte": ["$$item.season", end_season] }
+                        ]
+                    },
+                    {
+                        "$and": [
+                            { "$eq": ["$$item.year", lower_bound_year] },
+                            { "$eq": ["$$item.season", lower_bound_season] }
+                        ]
                     }
                 ]
             }
+        # 每個 filtered item 要輸出哪些欄位
+        item_fields = {
+            "year": "$$item.year",
+            "season": "$$item.season"
+        }
+        if indexes:
+            for idx in indexes:
+                item_fields[idx] = f"$$item.{report_type}.{idx}"
+        else:
+            item_fields[report_type] = f"$$item.{report_type}"
         return [
             {
                 "$match": {
                     "ticker": ticker
                 }
-            }, {
-                "$unwind": "$data"
-            }, {
-                "$match": match_stage
-            }, {
-                "$project": project_stage
+            },
+            {
+                "$project": {
+                    "_id": 0,
+                    "ticker": 1,
+                    "data": {
+                        "$map": {
+                            "input": {
+                                "$filter": {
+                                    "input": "$data",
+                                    "as": "item",
+                                    "cond": filter_cond
+                                }
+                            },
+                            "as": "item",
+                            "in": item_fields
+                        }
+                    }
+                }
             }
         ]
@@ -330,7 +349,10 @@ class TEJStockPriceFetcher(BaseTEJFetcher):
         if (period is not None):
             latest_date = self.get_latest_data_time(ticker)
-            start_date = self.set_time_shift(date=latest_date, period=period)
+            if (latest_date):
+                start_date = self.set_time_shift(date=latest_date, period=period)
+            else:
+                start_date = datetime.strptime(start_date, "%Y-%m-%d")
         else:
             start_date = datetime.strptime(start_date, "%Y-%m-%d")
@@ -339,26 +361,36 @@ class TEJStockPriceFetcher(BaseTEJFetcher):
                 "$match": {
                     "ticker": ticker
                 }
-            }, {
-                "$unwind": "$data"
-            }, {
-                "$match": {
-                    "data.mdate": {
-                        "$gt": start_date
-                    }
-                }
-            }, {
+            },
+            {
                 "$project": {
+                    "_id": 0,
                     "ticker": 1,
-                    "data": 1,
-                    "_id": 0
+                    "data": {
+                        "$filter": {
+                            "input": "$data",
+                            "as": "item",
+                            "cond": {
+                                "$gte": ["$$item.mdate", start_date]
+                            }
+                        }
+                    }
                 }
             }
         ]
         datas = self.collection.aggregate(pipeline).to_list()
-        elements = [element['data'] for element in datas]
-        data_df = pd.DataFrame(elements).set_index('mdate')
+        datas = datas[0]
+        datas = datas.get('data', [])
+        elements = [element for element in datas]
+        try:
+            data_df = pd.DataFrame(elements).set_index('mdate')
+        except:
+            column_names = [
+                "coid", "mdate", "mkt", "open_d", "high_d", "low_d", "close_d",
+                "adjfac", "vol", "amt", "trn", "bid", "offer", "avgprc", "roi",
+                "hmlpct", "turnover", "shares", "mktcap", "mktcap_pct",
+                "amt_pct", "per", "pbr", "div_yid", "cdiv_yid"
+            ]
+            data_df = pd.DataFrame(columns = column_names)
         return data_df

neurostats-API 0.0.23b1__py3-none-any.whl → 0.0.24__py3-none-any.whl

neurostats-API 0.0.23b1py3-none-any.whl → 0.0.24py3-none-any.whl