PyPI - neurostats-API - Versions diffs - 0.0.21b0__py3-none-any.whl → 0.0.23b0__py3-none-any.whl - Mend

neurostats-API 0.0.21b0py3-none-any.whl → 0.0.23b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

neurostats_API/__init__.py +1 -1
neurostats_API/fetchers/balance_sheet.py +138 -111
neurostats_API/fetchers/base.py +89 -74
neurostats_API/fetchers/cash_flow.py +120 -111
neurostats_API/fetchers/finance_overview.py +2 -2
neurostats_API/fetchers/month_revenue.py +1 -1
neurostats_API/fetchers/profit_lose.py +188 -113
neurostats_API/fetchers/tech.py +175 -42
neurostats_API/fetchers/tej_finance_report.py +230 -335
neurostats_API/tools/company_list/tw.json +2175 -0
neurostats_API/tools/tej_db/tej_db_skip_index.yaml +3 -1
neurostats_API/tools/tej_db/tej_db_thousand_index.yaml +0 -1
neurostats_API/utils/__init__.py +0 -1
neurostats_API/utils/calculate_value.py +99 -1
neurostats_API/utils/data_process.py +43 -15
{neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23b0.dist-info}/METADATA +2 -2
neurostats_API-0.0.23b0.dist-info/RECORD +34 -0
neurostats_API/utils/fetcher.py +0 -1056
neurostats_API-0.0.21b0.dist-info/RECORD +0 -34
/neurostats_API/tools/{balance_sheet.yaml → twse/balance_sheet.yaml} +0 -0
/neurostats_API/tools/{cash_flow_percentage.yaml → twse/cash_flow_percentage.yaml} +0 -0
/neurostats_API/tools/{finance_overview_dict.yaml → twse/finance_overview_dict.yaml} +0 -0
/neurostats_API/tools/{profit_lose.yaml → twse/profit_lose.yaml} +0 -0
/neurostats_API/tools/{seasonal_data_field_dict.txt → twse/seasonal_data_field_dict.txt} +0 -0
{neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23b0.dist-info}/WHEEL +0 -0
{neurostats_API-0.0.21b0.dist-info → neurostats_API-0.0.23b0.dist-info}/top_level.txt +0 -0

neurostats_API/fetchers/tej_finance_report.py CHANGED Viewed

@@ -18,34 +18,39 @@ class FinanceReportFetcher(BaseTEJFetcher):
         QOQ_NOCAL = 4
     def __init__(
-            self,
-            mongo_uri,
-            db_name="company",
-            collection_name="TWN/AINVFQ1"
-        ):
+        self, mongo_uri, db_name="company", collection_name="TWN/AINVFQ1"
+    ):
         self.client = MongoClient(mongo_uri)
         self.db = self.client[db_name]
         self.collection = self.db[collection_name]
-        index_dict = StatsProcessor.load_yaml("tej_db/tej_db_index.yaml")
-        thousand_dict = StatsProcessor.load_yaml("tej_db/tej_db_thousand_index.yaml")
-        percent_dict = StatsProcessor.load_yaml("tej_db/tej_db_percent_index.yaml")
-        skip_dict = StatsProcessor.load_yaml("tej_db/tej_db_percent_index.yaml")
-        self.check_index = set(index_dict[collection_name])
-        self.skip_index = set(skip_dict[collection_name])
+        index_files = [
+            "tej_db/tej_db_index.yaml", "tej_db/tej_db_thousand_index.yaml",
+            "tej_db/tej_db_percent_index.yaml"
+        ]
-        self.thousand_index_list = list(thousand_dict[collection_name])
-        self.percent_index_list = list(percent_dict[collection_name])
+        self.index_dict, self.thousand_dict, self.percent_dict = [
+            StatsProcessor.load_yaml(file) for file in index_files
+        ]
+        self.check_index = set(self.index_dict.get(collection_name, []))
+        self.skip_index = set(self.percent_dict.get(collection_name, []))
+        self.thousand_index_list = list(
+            self.thousand_dict.get(collection_name, [])
+        )
+        self.percent_index_list = list(
+            self.percent_dict.get(collection_name, [])
+        )
     def get(
-            self,
-            ticker,
-            fetch_mode: FetchMode = FetchMode.QOQ_NOCAL,
-            start_date: str = None,
-            end_date: str = None,
-            report_type: str = "Q",
-            indexes: list = []):
+        self,
+        ticker,
+        fetch_mode: FetchMode = FetchMode.QOQ_NOCAL,
+        start_date: str = None,
+        end_date: str = None,
+        report_type: str = "Q",
+        indexes: list = []
+    ):
         """
         基礎的query function
         ticker(str): 股票代碼
@@ -58,351 +63,239 @@ class FinanceReportFetcher(BaseTEJFetcher):
         indexes(List): 指定的index
         """
         # 確認indexes中是否有錯誤的index，有的話回傳warning
-        if (indexes and self.check_index):
-            indexes = set(indexes)
-            difference = indexes - self.check_index
-            if (difference):
+        if indexes and self.check_index:
+            invalid_indexes = set(indexes) - self.check_index
+            if invalid_indexes:
                 warnings.warn(
-                    f"{list(difference)} 沒有出現在資料表中，請確認column名稱是否正確",
-                    UserWarning)
-        if (not start_date):
-            start_date = datetime.strptime("2005-01-01", "%Y-%m-%d")
-        else:
-            start_date = datetime.strptime(start_date, "%Y-%m-%d")
-        if (fetch_mode in {self.FetchMode.QOQ, self.FetchMode.QOQ_NOCAL}):
-            if (not end_date):
-                end_date = datetime.today()
-            else:
-                end_date = datetime.strptime(end_date, "%Y-%m-%d")
-            assert (start_date <= end_date)
-            start_year = start_date.year
-            start_season = (start_date.month - 1) // 4 + 1
-            end_year = end_date.year
-            end_season = (end_date.month - 1) // 4 + 1
-            if (fetch_mode == self.FetchMode.QOQ):
-                use_cal = True
-            else:
-                use_cal = False
-            data_df = self.get_QoQ_data(
+                    f"{list(invalid_indexes)} 不存在，請確認欄位名稱", UserWarning
+                )
+        start_date = datetime.strptime(
+            start_date, "%Y-%m-%d"
+        ) if start_date else datetime(2005, 1, 1)
+        if fetch_mode in {self.FetchMode.QOQ, self.FetchMode.QOQ_NOCAL}:
+            end_date = datetime.strptime(end_date, "%Y-%m-%d"
+                                         ) if end_date else datetime.today()
+            assert start_date <= end_date
+            start_year, end_year = start_date.year, end_date.year
+            return self.get_QoQ_data(
                 ticker=ticker,
-                start_year=start_year,
-                start_season=start_season,
-                end_year=end_year,
-                end_season=end_season,
+                start_date=start_date,
+                end_date=end_date,
                 report_type=report_type,
                 indexes=indexes,
-                use_cal=use_cal)
-            return data_df
-        elif (fetch_mode in {self.FetchMode.YOY, self.FetchMode.YOY_NOCAL}):
-            start_year = start_date.year
-            end_date = self.get_latest_data_time(ticker)
-            if (not end_date):
-                end_date = datetime.today()
-            end_year = end_date.year
-            season = (end_date.month - 1) // 4 + 1
+                use_cal=(fetch_mode == self.FetchMode.QOQ)
+            )
-            if (fetch_mode == self.FetchMode.YOY):
-                use_cal = True
-            else:
-                use_cal = False
-            data_df = self.get_YoY_data(
+        elif fetch_mode in {self.FetchMode.YOY, self.FetchMode.YOY_NOCAL}:
+            end_date = self.get_latest_data_time(ticker) or datetime.today()
+            start_year, end_year = start_date.year, end_date.year
+            end_season = (end_date.month - 1) // 4 + 1
+            return self.get_YoY_data(
                 ticker=ticker,
                 start_year=start_year,
                 end_year=end_year,
-                season=season,
+                season=end_season,
                 report_type=report_type,
                 indexes=indexes,
-                use_cal=use_cal)
-            return data_df
+                use_cal=(fetch_mode == self.FetchMode.YOY)
+            )
     def get_QoQ_data(
-            self,
-            ticker,
-            start_year,
-            start_season,
-            end_year,
-            end_season,
-            report_type="Q",
-            indexes=[],
-            use_cal=False):
+        self,
+        ticker,
+        start_date,
+        end_date,
+        report_type="Q",
+        indexes=[],
+        use_cal=False
+    ):
         """
             取得時間範圍內每季資料
         """
-        if (use_cal):
-            if (start_season == 1):
-                lower_bound_year = start_year - 1
-                lower_bound_season = 4
-            else:
-                lower_bound_year = start_year
-                lower_bound_season = start_season - 1
-        else:
-            lower_bound_year = start_year,
-            lower_bound_season = start_season
-        if (not indexes):    # 沒有指定 -> 取全部
-            pipeline = [
-                {
-                    "$match": {
-                        "ticker": ticker
-                    }
-                }, {
-                    "$unwind": "$data"
-                }, {
-                    "$match": {
-                        "$or": [
-                            {
-                                "data.year": {
-                                    "$gt": start_year,
-                                    "$lt": end_year
-                                }
-                            }, {
-                                "data.year": start_year,
-                                "data.season": {
-                                    "$gte": start_season
-                                }
-                            }, {
-                                "data.year": end_year,
-                                "data.season": {
-                                    "$lte": end_season
-                                }
-                            }, {
-                                "data.year": lower_bound_year,
-                                "data.season": lower_bound_season
-                            }
-                        ]
-                    }
-                }, {
-                    "$project": {
-                        "data.year": 1,
-                        "data.season": 1,
-                        f"data.{report_type}": 1,
-                        "_id": 0
-                    }
-                }
-            ]
-        else:    # 取指定index
-            project_stage = {"data.year": 1, "data.season": 1}
-            for index in indexes:
-                project_stage[f"data.{report_type}.{index}"] = 1
+        start_year, start_season = start_date.year, (
+            start_date.month - 1
+        ) // 4 + 1
+        end_year, end_season = end_date.year, (end_date.month - 1) // 4 + 1
+        lower_bound_year, lower_bound_season = (
+            start_year - 1, 4
+        ) if start_season == 1 else (start_year, start_season - 1)
+        pipeline = self.build_pipeline(
+            ticker, start_year, start_season, end_year, end_season,
+            lower_bound_year, lower_bound_season, report_type, indexes
+        )
+        fetched_data = self.collection.aggregate(pipeline).to_list()
-            pipeline = [
-                {
-                    "$match": {
-                        "ticker": ticker
-                    }
-                }, {
-                    "$unwind": "$data"
-                }, {
-                    "$match": {
-                        "$or": [
-                            {
-                                "data.year": {
-                                    "$gt": start_year,
-                                    "$lt": end_year
-                                }
-                            }, {
-                                "data.year": start_year,
-                                "data.season": {
-                                    "$gte": start_season
-                                }
-                            }, {
-                                "data.year": end_year,
-                                "data.season": {
-                                    "$lte": end_season
-                                }
-                            }, {
-                                "data.year": lower_bound_year,
-                                "data.season": lower_bound_season
-                            }
-                        ]
-                    }
-                }, {
-                    "$project": project_stage
-                }
-            ]
+        data_dict = self.transform_value(
+            StatsProcessor.list_of_dict_to_dict(
+                data_list=fetched_data,
+                keys=["year", "season"],
+                delimeter="Q",
+                data_key=report_type
+            )
+        )
-        fetched_data = self.collection.aggregate(pipeline).to_list()
-        data_dict = StatsProcessor.list_of_dict_to_dict(
-            fetched_data,
-            keys=["year", "season"],
-            delimeter="Q",
-            data_key=report_type)
-        data_dict = self.transform_value(data_dict)
-        if (use_cal):
-            data_with_QoQ = self.cal_QoQ(data_dict)
-            data_df = pd.DataFrame.from_dict(data_with_QoQ)
-            data_df = data_df.iloc[:, 1:]
-            data_df = data_df.iloc[:, ::-1].T
-            data_dict = data_df.to_dict()
-            data_dict = self.get_dict_of_df(data_dict)
-            return data_dict
-        else:
-            data_df = pd.DataFrame.from_dict(data_dict)
-            data_df = data_df.iloc[:, ::-1]
-            return data_df
+        return self.calculate_and_format(data_dict, use_cal, self.cal_QoQ)
     def get_YoY_data(
-            self,
-            ticker,
-            start_year,
-            end_year,
-            season,
-            report_type="Q",
-            indexes=[],
-            use_cal=False):
+        self,
+        ticker,
+        start_year,
+        end_year,
+        season,
+        report_type="Q",
+        indexes=[],
+        use_cal=False
+    ):
         """
         取得某季歷年資料
         """
-        if (use_cal):
-            select_year = set()
-            for year in range(start_year, end_year + 1):
-                year_shifts = {year, year - 1, year - 3, year - 5, year - 10}
-                select_year = select_year.union(year_shifts)
-            select_year = sorted(list(select_year), reverse=True)
-        else:
-            select_year = [year for year in range(start_year, end_year + 1)]
-        if (not indexes):    # 沒有指定 -> 取全部
-            pipeline = [
-                {
-                    "$match": {
-                        "ticker": ticker
-                    }
-                }, {
-                    "$unwind": "$data"
-                }, {
-                    "$match": {
-                        "$or": [
-                            {
-                                "$and": [
-                                    {
-                                        "data.year": {
-                                            "$in": select_year
-                                        }
-                                    }, {
-                                        "data.season": {
-                                            "$eq": season
-                                        }
-                                    }
-                                ]
-                            },
-                        ]
-                    }
-                }, {
-                    "$project": {
-                        "data.year": 1,
-                        "data.season": 1,
-                        f"data.{report_type}": 1,
-                        "_id": 0
-                    }
-                }
-            ]
-        else:    # 取指定index
-            project_stage = {"data.year": 1, "data.season": 1}
-            for index in indexes:
-                project_stage[f"data.{report_type}.{index}"] = 1
-            pipeline = [
-                {
-                    "$match": {
-                        "ticker": ticker
-                    }
-                }, {
-                    "$unwind": "$data"
-                }, {
-                    "$match": {
-                        "$and": [
-                            {
-                                "data.year": {
-                                    "$in": select_year
-                                }
-                            }, {
-                                "data.season": {
-                                    "$eq": season
-                                }
-                            }
-                        ]
-                    }
-                }, {
-                    "$project": project_stage
-                }
-            ]
+        select_year = sorted(
+            {year
+             for year in range(start_year, end_year + 1)} | {
+                 y
+                 for year in range(start_year, end_year + 1)
+                 for y in {year, year - 1, year - 3, year - 5, year - 10}
+             }
+        ) if use_cal else list(range(start_year, end_year + 1))
+        pipeline = self.build_pipeline(
+            ticker,
+            select_year,
+            season,
+            None,
+            None,
+            None,
+            None,
+            report_type,
+            indexes,
+            year_based=True
+        )
         fetched_data = self.collection.aggregate(pipeline).to_list()
+        data_dict = self.transform_value(
+            StatsProcessor.list_of_dict_to_dict(
+                fetched_data, ["year", "season"], "Q", report_type
+            )
+        )
+        return self.calculate_and_format(
+            data_dict, use_cal,
+            lambda x: self.cal_YoY(x, start_year, end_year, season)
+        )
-        # 處理計算YoY
-        data_dict = StatsProcessor.list_of_dict_to_dict(
-            fetched_data,
-            keys=['year', 'season'],
-            data_key=report_type,
-            delimeter='Q')
-        data_dict = self.transform_value(data_dict)
-        if (use_cal):
-            data_with_YoY = self.cal_YoY(
-                data_dict, start_year, end_year, season)
-            data_df = pd.DataFrame.from_dict(data_with_YoY)
-            data_df = data_df.iloc[:, ::-1].T
-            data_dict = data_df.to_dict()
-            data_dict = self.get_dict_of_df(data_dict)
-            return data_dict
-        else:
-            data_df = pd.DataFrame.from_dict(data_dict)
-            data_df = data_df.iloc[:, ::-1]
-            return data_df
     def transform_value(self, data_dict):
         """
         處理千元, %等單位
         """
         data_df = pd.DataFrame.from_dict(data_dict)
-        process_set = set(data_df.index).intersection(set(self.thousand_index_list))
-        process_list = list(process_set)
-        data_df.loc[process_list] = data_df.loc[process_list].map(
-            lambda x : StatsProcessor.cal_non_percentage(x, postfix="千元")
-        )
+        for category, postfix in [(self.thousand_index_list, "千元"),
+                                  (self.percent_index_list, "%")]:
+            process_list = list(set(data_df.index) & set(category))
+            if postfix == "%":
+                data_df.loc[process_list] = data_df.loc[process_list].astype(
+                    str
+                ).map(lambda x: f"{x}%")
+            else:
+                data_df.loc[process_list] = data_df.loc[process_list].map(
+                    lambda x: StatsProcessor.
+                    cal_non_percentage(x, postfix=postfix)
+                )
+        return data_df.to_dict()
+    def build_pipeline(
+        self,
+        ticker,
+        start_year,
+        start_season,
+        end_year,
+        end_season,
+        lower_bound_year,
+        lower_bound_season,
+        report_type,
+        indexes,
+        year_based=False
+    ):
+        project_stage = {
+            "_id": 0,
+            "data.year": 1,
+            "data.season": 1,
+            **{
+                f"data.{report_type}.{idx}": 1
+                for idx in indexes
+            }
+        } if indexes else {
+            "_id": 0,
+            "data.year": 1,
+            "data.season": 1,
+            f"data.{report_type}": 1
+        }
+        if (year_based):
+            match_stage = {
+                "data.year": {
+                    "$in": start_year
+                } if year_based else {
+                    "$gt": start_year,
+                    "$lt": end_year
+                },
+                "data.season": end_season
+            }
+        else:
+            match_stage = {
+                "$or": [
+                    {
+                        "data.year": {
+                            "$gt": start_year,
+                            "$lt": end_year
+                        }
+                    }, {
+                        "data.year": start_year,
+                        "data.season": {
+                            "$gte": start_season
+                        }
+                    }, {
+                        "data.year": end_year,
+                        "data.season": {
+                            "$lte": end_season
+                        }
+                    }, {
+                        "data.year": lower_bound_year,
+                        "data.season": lower_bound_season
+                    }
+                ]
+            }
-        process_set = set(data_df.index).intersection(set(self.percent_index_list))
-        process_list = list(process_set)
-        data_df.loc[process_list] = data_df.loc[process_list].map(
-            lambda x : f"{x}%"
+        return [
+            {
+                "$match": {
+                    "ticker": ticker
+                }
+            }, {
+                "$unwind": "$data"
+            }, {
+                "$match": match_stage
+            }, {
+                "$project": project_stage
+            }
+        ]
+    def calculate_and_format(self, data_dict, use_cal, calc_function):
+        data_df = pd.DataFrame.from_dict(
+            calc_function(data_dict) if use_cal else data_dict
+        ).iloc[:, ::-1]
+        return data_df if not use_cal else self.get_dict_of_df(
+            data_df.T.to_dict()
         )
-        data_dict = data_df.to_dict()
-        return data_dict
 class TEJStockPriceFetcher(BaseTEJFetcher):
     def __init__(
-            self,
-            mongo_uri,
-            db_name: str = "company",
-            collection_name: str = None):
+        self, mongo_uri, db_name: str = "company", collection_name: str = None
+    ):
         self.mongo_uri = mongo_uri
         self.db_name = db_name
         self.collection_name = collection_name
@@ -411,14 +304,16 @@ class TEJStockPriceFetcher(BaseTEJFetcher):
         self.db = self.client[self.db_name]
         self.collection = self.db[self.collection_name]
-        self.check_period = ['1d', '7d', '1m', '3m', '1y', '3y', '5y', '10y', 'all']
+        self.check_period = [
+            '1d', '7d', '1m', '3m', '1y', '3y', '5y', '10y', 'all'
+        ]
     def get(
-            self,
-            ticker: str = "2330",
-            start_date: str = "2024-10-01",
-            period: str = None
-        ):
+        self,
+        ticker: str = "2330",
+        start_date: str = "2024-10-01",
+        period: str = None
+    ):
         """
         取得開高低收資料
         start_date: str: 起始的日期
@@ -463,4 +358,4 @@ class TEJStockPriceFetcher(BaseTEJFetcher):
         data_df = pd.DataFrame(elements).set_index('mdate')
-        return data_df
+        return data_df

neurostats-API 0.0.21b0__py3-none-any.whl → 0.0.23b0__py3-none-any.whl

neurostats-API 0.0.21b0py3-none-any.whl → 0.0.23b0py3-none-any.whl