PyPI - neurostats-API - Versions diffs - 0.0.23b2__py3-none-any.whl → 0.0.24.post1__py3-none-any.whl - Mend

neurostats-API 0.0.23b2py3-none-any.whl → 0.0.24.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

neurostats_API/__init__.py +1 -1
neurostats_API/fetchers/balance_sheet.py +64 -40
neurostats_API/fetchers/base.py +6 -2
neurostats_API/fetchers/cash_flow.py +91 -70
neurostats_API/fetchers/finance_overview.py +26 -26
neurostats_API/fetchers/institution.py +80 -7
neurostats_API/fetchers/macro_daily_event.py +8 -0
neurostats_API/fetchers/margin_trading.py +0 -3
neurostats_API/fetchers/month_revenue.py +139 -105
neurostats_API/fetchers/profit_lose.py +103 -83
neurostats_API/fetchers/tech.py +0 -58
neurostats_API/fetchers/tej_finance_report.py +101 -69
neurostats_API/fetchers/value_invest.py +26 -9
neurostats_API/utils/calculate_value.py +5 -2
neurostats_API/utils/data_process.py +12 -6
neurostats_API/utils/logger.py +21 -0
{neurostats_API-0.0.23b2.dist-info → neurostats_API-0.0.24.post1.dist-info}/METADATA +2 -2
neurostats_API-0.0.24.post1.dist-info/RECORD +36 -0
neurostats_API-0.0.23b2.dist-info/RECORD +0 -34
{neurostats_API-0.0.23b2.dist-info → neurostats_API-0.0.24.post1.dist-info}/WHEEL +0 -0
{neurostats_API-0.0.23b2.dist-info → neurostats_API-0.0.24.post1.dist-info}/top_level.txt +0 -0

neurostats_API/fetchers/institution.py CHANGED Viewed

@@ -19,6 +19,7 @@ class InstitutionFetcher(StatsFetcher):
     def __init__(self, ticker, db_client):
         super().__init__(ticker, db_client)
+        self.tej_collection = self.db['TWN/APISHRACT'] # TEJ備援
     def prepare_query(self, start_date, end_date):
         pipeline = super().prepare_query()
@@ -107,9 +108,6 @@ class InstitutionFetcher(StatsFetcher):
                 hour=0, minute=0, second=0, microsecond=0
             )
         except Exception as e:
-            print(
-                f"No updated time for institution_trading in {self.ticker}, use current time instead"
-            )
             end_date = datetime.now(self.timezone)
             end_date = end_date.replace(
                 hour=0, minute=0, second=0, microsecond=0
@@ -123,14 +121,22 @@ class InstitutionFetcher(StatsFetcher):
         fetched_data = self.collect_data(start_date, end_date)
         fetched_data['daily_data'] = sorted(
-            fetched_data['daily_data'], key=lambda x: x['date'], reverse=True
+            fetched_data.get("daily_data", []), key=lambda x: x['date'], reverse=True
         )
+        if (not fetched_data['institution_trading']):
+            #  找 TEJ 備援
+            fetched_data.update(
+                {
+                    'institution_trading': self.collect_tej(start_date, end_date)
+                }
+            )
         fetched_data['institution_trading'] = sorted(
-            fetched_data['institution_trading'],
+            fetched_data.get('institution_trading', []),
             key=lambda x: x['date'],
             reverse=True
-        ) if (fetched_data['institution_trading']) else []
+        ) if fetched_data['institution_trading'] else None
         table_dict = self.process_data(fetched_data)
@@ -227,7 +233,6 @@ class InstitutionFetcher(StatsFetcher):
         table_dict['annual_trading'] = self.process_annual_trading(
             annual_dates, annual_trading_skip
         )
         return table_dict
@@ -343,3 +348,71 @@ class InstitutionFetcher(StatsFetcher):
                 "percentage": 0
             },
         }
+    def collect_tej(self, start_date, end_date):
+        pipeline = self.prepare_tej_query(start_date, end_date)
+        result = self.tej_collection.aggregate(pipeline)
+        result = result.to_list()
+        result_df = pd.DataFrame(result[0]['filtered_data'])
+        proj_columns = {
+            "mdate": "date",
+            "qfii_buy": "外資買進股數",
+            "qfii_sell": "外資賣出股數",
+            "qfii_ex": "外資買賣超股數",
+            "fund_buy" : "投信買進股數",
+            "fund_sell" : "投信賣出股數",
+            "fund_ex" : "投信買賣超股數",
+            "dlrp_buy" : "自營商買賣超股數(自行)",
+            "dlrp_sell" : "自營商買進股數(自行)",
+            "dlrp_ex" : "自營商賣出股數(自行)",
+            "dlrh_buy" : "自營商買進股數(避險)",
+            "dlrh_sell" : "自營商賣出股數(避險)",
+            "dlrh_ex" : "自營買賣超股數(避險)",
+            "tot_ex" : "三大法人買賣超股數"
+        }
+        target_index = list(proj_columns.keys())[1:]
+        result_df.loc[:, target_index] = result_df.loc[:, target_index].map(lambda x : 1000 * x) # TEJ單位為千股
+        result_df = result_df.loc[:, list(proj_columns.keys())]
+        result_df = result_df.reindex(columns = list(proj_columns.keys()), fill_value = None)
+        result_df = result_df.rename(columns = proj_columns)
+        return result_df.to_dict(orient = 'records')
+    def prepare_tej_query(self, start_date, end_date):
+        return [
+            {
+                "$match": {
+                    "ticker": { "$eq": self.ticker }
+                }
+            },
+            {
+                "$project": {
+                    "_id": 0,
+                    "ticker": 1,
+                    "filtered_data": {
+                        "$map": {
+                            "input": {
+                                "$filter": {
+                                    "input": "$data",
+                                    "as": "item",
+                                    "cond": {
+                                        "$and": [
+                                            { "$gte": ["$$item.mdate", start_date] },
+                                            { "$lte": ["$$item.mdate", end_date] }
+                                        ]
+                                    }
+                                }
+                            },
+                            "as": "filtered_item",
+                            "in": "$$filtered_item"
+                        }
+                    }
+                }
+            }
+        ]

neurostats_API/fetchers/macro_daily_event.py ADDED Viewed

@@ -0,0 +1,8 @@
+from .base import StatsFetcher
+from datetime import datetime, timedelta, date
+import json
+import numpy as np
+import pandas as pd
+from ..utils import StatsDateTime, StatsProcessor
+import importlib.resources as pkg_resources
+import yaml

neurostats_API/fetchers/margin_trading.py CHANGED Viewed

@@ -121,9 +121,6 @@ class MarginTradingFetcher(StatsFetcher):
                 hour=0, minute=0, second=0, microsecond=0
             )
         except Exception as e:
-            print(
-                f"No updated time for institution_trading in {self.ticker}, use current time instead"
-            )
             end_date = datetime.now(self.timezone)
             end_date = end_date.replace(
                 hour=0, minute=0, second=0, microsecond=0

neurostats_API/fetchers/month_revenue.py CHANGED Viewed

@@ -4,6 +4,8 @@ import pandas as pd
 from ..utils import StatsDateTime, StatsProcessor, YoY_Calculator
 import importlib.resources as pkg_resources
 import yaml
+import traceback
+import logging
 class MonthRevenueFetcher(StatsFetcher):
@@ -11,151 +13,183 @@ class MonthRevenueFetcher(StatsFetcher):
         iFa.ai: 財務分析 -> 每月營收
     """
-    def __init__(self, ticker, db_client):
+    def __init__(self, ticker, db_client, logger = None):
         super().__init__(ticker, db_client)
+        self.logger = logger or logging.getLogger(__name__)
-    def prepare_query(self, target_year, target_month):
+    def _prepare_query(self, target_year, target_month):
         pipeline = super().prepare_query()
-        pipeline.append({
-            "$project": {
-                "_id": 0,
-                "ticker": 1,
-                "company_name": 1,
-                "monthly_data": {
-                    "$sortArray": {
-                        "input": "$monthly_data",
-                        "sortBy": {
-                            "year": -1,
-                            "month": -1
+        pipeline.append(
+            {
+                "$project": {
+                    "_id": 0,
+                    "ticker": 1,
+                    "company_name": 1,
+                    "monthly_data": {
+                        "$sortArray": {
+                            "input": "$monthly_data",
+                            "sortBy": {
+                                "year": -1,
+                                "month": -1
+                            }
                         }
-                    }
-                },
+                    },
+                }
             }
-        })
+        )
         return pipeline
     def collect_data(self, target_year, target_month):
-        pipeline = self.prepare_query(target_year, target_month)
+        pipeline = self._prepare_query(target_year, target_month)
         fetched_data = self.collection.aggregate(pipeline)
+        fetched_data = fetched_data.to_list()
-        fetched_data = list(fetched_data)
-        return fetched_data[-1]
+        return fetched_data
     def query_data(self):
-        try:
-            latest_time = StatsDateTime.get_latest_time(
-                self.ticker, self.collection)['last_update_time']
-            target_year = latest_time['monthly_data']['latest_year']
-            target_month = latest_time['monthly_data']['latest_month']
-        except Exception as e:
-            today = StatsDateTime.get_today()
-            target_month = today.month
-            target_year = today.year
+        target_year, target_month = self._get_target_year_and_month()
         # Query data
         fetched_data = self.collect_data(target_year, target_month)
-        return self.process_data(fetched_data)
+        try:
+            return self._process_data(fetched_data[-1])
+        except Exception:
+            recent_date = []
+            for _ in range(12):
+                recent_date.append(f"{target_year}/{target_month}")
+                target_year, target_month = (
+                    target_year - 1, 12
+                ) if target_month == 1 else (target_year, target_month - 1)
+            # logging.warning(f"{self.ticker}: No monthly revenue data in TWSE mongoDB", exc_info=True)
+            return self._get_empty_structure(target_year, target_month)
+    def _process_data(self, fetched_data):
-    def process_data(self, fetched_data):
+        monthly_data = fetched_data.get('monthly_data', [])
+        if not monthly_data:
+            raise ValueError("monthly_data is empty or missing")
-        monthly_data = fetched_data['monthly_data']
         for data in monthly_data:
             for key, value in data.items():
-                if ("YoY" in key):
+                if "YoY" in key:
                     data[key] = StatsProcessor.cal_percentage(value)
-                elif ("ratio" in key or 'percentage' in key):
-                    data[key] = StatsProcessor.cal_non_percentage(value,
-                                                                  to_str=True,
-                                                                  postfix="%")
-                elif (key not in ('year', 'month')):
-                    data[key] = StatsProcessor.cal_non_percentage(value,
-                                                                  postfix="千元")
-        target_month = monthly_data[0]['month']
-        monthly_df = pd.DataFrame(monthly_data)
+                elif "ratio" in key or "percentage" in key:
+                    data[key] = StatsProcessor.cal_non_percentage(value, to_str=True, postfix="%")
+                elif key not in ('year', 'month'):
+                    data[key] = StatsProcessor.cal_non_percentage(value, postfix="千元")
+        monthly_df = pd.DataFrame(monthly_data)
+        target_month = monthly_data[0]['month']
         target_month_df = monthly_df[monthly_df['month'] == target_month]
         annual_month_df = monthly_df[monthly_df['month'] == 12]
-        month_revenue_df = monthly_df.pivot(index='month',
-                                            columns='year',
-                                            values='revenue')
-        grand_total_df = target_month_df.pivot(index='month',
-                                               columns='year',
-                                               values='grand_total')
-        annual_total_df = annual_month_df.pivot(index='month',
-                                               columns='year',
-                                               values='grand_total')
-        grand_total_df.rename(index={target_month: f"grand_total"},
-                              inplace=True)
+        month_revenue_df = monthly_df.pivot(
+            index='month', columns='year', values='revenue'
+        )
         month_revenue_df = month_revenue_df.sort_index(ascending=False)
-        month_revenue_df = pd.concat([grand_total_df, month_revenue_df],
-                                     axis=0)
-        fetched_data['month_revenue'] = month_revenue_df[sorted(
-            month_revenue_df.columns, reverse=True)]
-        # 歷年月營收
-        fetched_data[
-            'this_month_revenue_over_years'] = target_month_df.set_index(
-                "year")[[
-                    "revenue", "revenue_increment_ratio", "YoY_1", "YoY_3",
-                    "YoY_5", "YoY_10"
-                ]].T
-        # 歷年營收成長量
-        fetched_data['grand_total_over_years'] = target_month_df.set_index(
-            "year")[[
-                "grand_total", "grand_total_increment_ratio",
-                "grand_total_YoY_1", "grand_total_YoY_3", "grand_total_YoY_5",
-                "grand_total_YoY_10"
-            ]].T
-        fetched_data.pop("monthly_data")
+        grand_total_df = target_month_df.pivot(
+            index='month', columns='year', values='grand_total'
+        )
+        grand_total_df.rename(
+            index={target_month: f"grand_total"}, inplace=True
+        )
+        month_revenue_df = pd.concat([grand_total_df, month_revenue_df], axis=0)
-        fetched_data['recent_month_revenue'] = self.get_recent_revenue_grwoth(
-            monthly_data, grand_total_dict=annual_total_df.to_dict(), interval = 12
+        annual_total_df = annual_month_df.pivot(
+            index='month', columns='year', values='grand_total'
+        )
+        fetched_data.update(
+            {
+                "month_revenue": month_revenue_df[sorted(month_revenue_df.columns, reverse=True)],
+                "this_month_revenue_over_years": target_month_df.set_index("year")[[
+                    "revenue", "revenue_increment_ratio", "YoY_1", "YoY_3", "YoY_5", "YoY_10"
+                ]].T,
+                "grand_total_over_years": target_month_df.set_index("year")[[
+                    "grand_total", "grand_total_increment_ratio", "grand_total_YoY_1", "grand_total_YoY_3", "grand_total_YoY_5", "grand_total_YoY_10"
+                ]].T,
+                "recent_month_revenue": self._get_recent_growth(monthly_data, grand_total_dict=annual_total_df.to_dict(), interval=12)
+            }
         )
+        fetched_data.pop("monthly_data")
         return fetched_data
-    def get_recent_revenue_grwoth(self, monthly_data, grand_total_dict, interval: int = 12):
-        recent_month_data = monthly_data[:interval + 1]
+    def _get_recent_growth(self, monthly_data, grand_total_dict, interval=12):
+        last_month_data = monthly_data[1:interval + 1] + [{}] * max(0, interval - len(monthly_data) + 1)
         MoMs = [
-            YoY_Calculator.cal_growth(this_value['revenue'], last_value['revenue'], delta = 1)
-            for this_value, last_value in zip(
-                recent_month_data[:12], recent_month_data[1:13]
-            )
+            YoY_Calculator.cal_growth(this.get('revenue'), last.get('revenue'), delta = 1)
+            for this, last in zip(monthly_data[:interval], last_month_data[:interval])
         ]
-        recent_month_data = {
-            "date" : [f"{data['year']}/{data['month']}" for data in recent_month_data[:interval]],
-            "revenue" : [data['revenue'] for data in recent_month_data[:interval]],
-            "MoM" : [f"{(data * 100):.2f}%" for data in MoMs],
-            "YoY" : [f"{data['revenue_increment_ratio']}" for data in recent_month_data[:interval]],
-            "total_YoY":  [f"{data['grand_total_increment_ratio']}" for data in recent_month_data[:interval]],
-        }
-        # accum_YoY
-        # accum_YoY 為 Davis提出的定義
-        # 2024/6的累計YoY(accum_YoY) 為 2024累計到6月為止的總營收/2023年度總營收
-        accum_YoYs = []
-        for data in monthly_data[:interval]:
+        def safe_accum_yoy(data):
             try:
                 year = data['year'] - 1
                 total = grand_total_dict[year][12]
-                accum_YoY = round(((data['grand_total'] - total) / total) * 100, 2)
-                accum_YoYs.append(f"{accum_YoY}%")
-            except Exception as e:
-                accum_YoYs.append(None)
-        recent_month_data['accum_YoY'] = accum_YoYs
+                grand_total = data.get('grand_total')
+                return f"{round(((grand_total - total) / total) * 100, 2)}%"
+            except Exception:
+                self.logger.debug(f"accum_YoY calc failed for year={data.get('year')} / ticker={self.ticker}", exc_info=True)
+                return None
-        recent_month_df = pd.DataFrame(recent_month_data).set_index('date').T
+        recent_month_data = {
+            "date": [f"{d.get('year', 0)}/{d.get('month', 0)}" for d in monthly_data[:interval]],
+            "revenue": [d.get('revenue') for d in monthly_data[:interval]],
+            "MoM": [f"{(m * 100):.2f}%" if isinstance(m, float) else None for m in MoMs],
+            "YoY": [d.get('revenue_increment_ratio') for d in monthly_data[:interval]],
+            "total_YoY": [d.get('grand_total_increment_ratio') for d in monthly_data[:interval]],
+            # accum_YoY
+            # accum_YoY 為 Davis提出的定義
+            # 2024/6的累計YoY(accum_YoY) 為 2024累計到6月為止的總營收/2023年度總營收
+            "accum_YoY": [safe_accum_yoy(d) for d in monthly_data[:interval]]
+        }
-        return recent_month_df
+        df = pd.DataFrame(recent_month_data)
+        return df[df['date'] != "0/0"].set_index('date').T
+    def _get_empty_structure(self, target_year, target_month):
+        """
+        Exception 發生時回傳
+        """
+        recent_date = [f"{target_year}/{target_month}"]
+        for _ in range(11):
+            target_year, target_month = (target_year - 1, 12) if target_month == 1 else (target_year, target_month - 1)
+            recent_date.append(f"{target_year}/{target_month}")
+        def empty_df(index, columns):
+            return pd.DataFrame(index=index, columns=columns)
+        return {
+            "month_revenue": empty_df(
+                index=pd.Index(['grand_total', 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1], dtype='object', name='month'),
+                columns=pd.Index([f"{target_year - i}" for i in range(10)], dtype=object, name='year')
+            ),
+            "this_month_revenue_over_years": empty_df(
+                index=pd.Index(['revenue', 'revenue_increment_ratio', 'YoY_1', 'YoY_3', 'YoY_5', 'YoY_10'], dtype='object'),
+                columns=pd.Index([f"{target_year - i}" for i in range(10)], dtype='int64', name='year')
+            ),
+            "grand_total_over_years": empty_df(
+                index=pd.Index(['grand_total', 'grand_total_increment_ratio', 'grand_total_YoY_1', 'grand_total_YoY_3', 'grand_total_YoY_5', 'grand_total_YoY_10'], dtype='object'),
+                columns=pd.Index([f"{target_year - i}" for i in range(10)], dtype='int64', name='year')
+            ),
+            "recent_month_revenue": empty_df(
+                index=pd.Index(['revenue', 'MoM', 'YoY', 'total_YoY', 'accum_YoY'], dtype='object'),
+                columns=pd.Index([], dtype = 'object', name = 'date')
+            )
+        }
+    def _get_target_year_and_month(self):
+        try:
+            latest_time = StatsDateTime.get_latest_time(self.ticker, self.collection)['last_update_time']
+            return latest_time['monthly_data']['latest_year'], latest_time['monthly_data']['latest_month']
+        except Exception:
+            today = StatsDateTime.get_today()
+            return today.year, today.month

neurostats-API 0.0.23b2__py3-none-any.whl → 0.0.24.post1__py3-none-any.whl

neurostats-API 0.0.23b2py3-none-any.whl → 0.0.24.post1py3-none-any.whl