PyPI - imsciences - Versions diffs - 0.9.5.4__py3-none-any.whl → 0.9.5.5__py3-none-any.whl - Mend - Supply Chain Defender

imsciences 0.9.5.4py3-none-any.whl → 0.9.5.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of imsciences might be problematic. Click here for more details.

Files changed (8) hide show

imsciences/pull.py CHANGED Viewed

@@ -11,6 +11,8 @@ from bs4 import BeautifulSoup
 import yfinance as yf
 import holidays
 from dateutil.easter import easter
+import urllib.request
+from geopy.geocoders import Nominatim
 from imsciences.mmm import dataprocessing
@@ -48,8 +50,8 @@ class datapull:
         print("\n6. pull_weather")
         print("   - Description: Fetch and process historical weather data for the specified country.")
-        print("   - Usage: pull_weather(week_commencing, country)")
-        print("   - Example: pull_weather('mon', 'GBR')")
+        print("   - Usage: pull_weather(week_commencing, start_date, country)")
+        print("   - Example: pull_weather('mon', '2020-01-01', ['GBR'])")
         print("\n7. pull_macro_ons_uk")
         print("   - Description: Fetch and process time series data from the Beta ONS API.")
@@ -60,6 +62,11 @@ class datapull:
         print("   - Description: Fetch and process time series data from the Beta ONS API.")
         print("   - Usage: pull_yfinance(tickers, week_start_day)")
         print("   - Example: pull_yfinance(['^FTMC', '^IXIC'], 'mon')")
+        print("\n9. pull_sports_events")
+        print("   - Description: Pull a veriety of sports events primaraly football and rugby.")
+        print("   - Usage: pull_sports_events(start_date, week_commencing)")
+        print("   - Example: pull_sports_events('2020-01-01', 'mon')")
     ###############################################################  MACRO ##########################################################################
@@ -507,15 +514,6 @@ class datapull:
             fathers_day    = nth_weekday_of_month(yr, 6, 6, 3)  # Sunday=6
             # Mother's Day US = 2nd Sunday in May
             mothers_day_us = nth_weekday_of_month(yr, 5, 6, 2)
-            # Mother's Day UK: 4th Sunday in Lent => "Mothering Sunday"
-            #   We can approximate as: Easter Sunday - 21 days
-            #   BUT we also must ensure it's actually Sunday
-            #   (the 4th Sunday in Lent can shift. We'll do the official approach below.)
-            #   Another approach: Easter Sunday - 7 * (4 weeks) is the 4th Sunday prior to Easter.
-            #   But that might overshoot if Lent started mid-week.
-            # Let's do a quick approach:
-            #   Officially: Mothering Sunday = 3 weeks before Easter Sunday (the 4th Sunday is Easter Sunday itself).
-            #   So Easter - 21 days should be the Sunday, but let's confirm with weekday check.
             mothering_sunday = easter(yr) - timedelta(days=21)
             # If for some reason that's not a Sunday (rare corner cases), shift to Sunday:
             while mothering_sunday.weekday() != 6:  # Sunday=6
@@ -641,610 +639,536 @@ class datapull:
         return df_combined
-    def pull_weather(self, week_commencing, country) -> pd.DataFrame:
-        import pandas as pd
-        import urllib.request  # noqa: F811
-        from datetime import datetime
-        import requests
-        from geopy.geocoders import Nominatim  # noqa: F811
-        # Week commencing dictionary
+    def pull_weather(self, week_commencing, start_date, country_codes) -> pd.DataFrame:
+        """
+        Pull weather data for a given week-commencing day and one or more country codes.
+        LOGIC:
+        1) For non-US countries (AU, GB, DE, CA, ZA):
+            - Mesonet => max_temp_f, min_temp_f -> compute mean_temp_f -> weekly average => 'avg_max_temp_f', etc.
+            - Open-Meteo => precipitation_sum => 'avg_rain_sum', snowfall_sum => 'avg_snow_sum'.
+            - Merge, then rename columns with prefix 'seas_{country}_'.
+        2) For the US:
+            - We have multiple <STATE>_ASOS networks (e.g. CA_ASOS, TX_ASOS).
+            - For each state, fetch from Mesonet => max_temp_f, min_temp_f, precip_in, snow_in -> compute mean_temp_f -> weekly average => 'avg_max_temp_f', 'avg_rain_sum', 'avg_snow_sum', etc.
+            - Rename columns for each state with prefix 'seas_us_{state}_'.
+            - Merge all states (and countries) into a single DataFrame.
+        :param week_commencing: A string in {"mon","tue","wed","thur","fri","sat","sun"}.
+        :param country_codes: A list of 2-letter country codes or a single string, e.g. ["GB","US"].
+        :return: A single Pandas DataFrame with weekly-aggregated data for all requested countries.
+        """
+        # ------------------------------------------------------------------ #
+        # 0) Handle either a single code or list of codes
+        # ------------------------------------------------------------------ #
+        if isinstance(country_codes, str):
+            country_codes = [country_codes]
+        elif not isinstance(country_codes, (list, tuple)):
+            raise ValueError("country_codes must be a list/tuple or a single string.")
+        # --- Setup / Constants --- #
         day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
+        # Map each 2-letter code to a key
+        country_dict = {
+            "US": "US_STATES",
+            "CA": "Canada",
+            "AU": "AU__ASOS",
+            "GB": "GB__ASOS",
+            "DE": "DE__ASOS",
+            "ZA": "ZA__ASOS"
+        }
-        # Country dictionary
-        country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada", "ZAF": "ZA__ASOS"}
+        # Station-based countries for Mesonet
+        station_map = {
+            "GB__ASOS": [
+                "&stations=EGCC", "&stations=EGNM", "&stations=EGBB", "&stations=EGSH",
+                "&stations=EGFF", "&stations=EGHI", "&stations=EGLC", "&stations=EGHQ",
+                "&stations=EGAC", "&stations=EGPF", "&stations=EGGD", "&stations=EGPE",
+                "&stations=EGNT"
+            ],
+            "AU__ASOS": [
+                "&stations=YPDN", "&stations=YBCS", "&stations=YBBN", "&stations=YSSY",
+                "&stations=YSSY", "&stations=YMEN", "&stations=YPAD", "&stations=YPPH"
+            ],
+            "DE__ASOS": [
+                "&stations=EDDL", "&stations=EDDH", "&stations=EDDB", "&stations=EDDN",
+                "&stations=EDDF", "&stations=EDDK", "&stations=EDLW", "&stations=EDDM"
+            ],
+            # Example: if ZA is also station-based, add it here.
+            "ZA__ASOS": [
+                # If you know the station codes, add them here:
+                # e.g. "&stations=FACT", "&stations=FAJS", ...
+            ],
+            # "FR__ASOS" if you need France, etc.
+        }
-        # Function to flatten a list of nested lists into a list
-        def flatten_list(nested_list):
-            return [item for sublist in nested_list for item in sublist]
+        # Non-US countries that also fetch RAIN & SNOW from Open-Meteo
+        rainfall_city_map = {
+            "GB__ASOS": [
+                "Manchester", "Leeds", "Birmingham", "London","Glasgow",
+            ],
+            "AU__ASOS": [
+                "Darwin", "Cairns", "Brisbane", "Sydney", "Melbourne", "Adelaide", "Perth"
+            ],
+            "DE__ASOS": [
+                "Dortmund", "Düsseldorf", "Frankfurt", "Munich", "Cologne", "Berlin", "Hamburg", "Nuernberg"
+            ],
+            "ZA__ASOS": [
+                "Johannesburg", "Cape Town", "Durban", "Pretoria"
+            ],
+        }
-        # Choose country
-        country = country_dict[country]
+        # Canada sub-networks
+        institute_vector = [
+            "CA_NB_ASOS", "CA_NF_ASOS", "CA_NT_ASOS", "CA_NS_ASOS", "CA_NU_ASOS"
+        ]
+        stations_list_canada = [
+            [
+                "&stations=CYQM", "&stations=CERM", "&stations=CZCR",
+                "&stations=CZBF", "&stations=CYFC", "&stations=CYCX"
+            ],
+            [
+                "&stations=CWZZ", "&stations=CYDP", "&stations=CYMH", "&stations=CYAY",
+                "&stations=CWDO", "&stations=CXTP", "&stations=CYJT", "&stations=CYYR",
+                "&stations=CZUM", "&stations=CYWK", "&stations=CYWK"
+            ],
+            [
+                "&stations=CYHI", "&stations=CZCP", "&stations=CWLI", "&stations=CWND",
+                "&stations=CXTV", "&stations=CYVL", "&stations=CYCO", "&stations=CXDE",
+                "&stations=CYWE", "&stations=CYLK", "&stations=CWID", "&stations=CYRF",
+                "&stations=CXYH", "&stations=CYWY", "&stations=CWMT"
+            ],
+            [
+                "&stations=CWEF", "&stations=CXIB", "&stations=CYQY", "&stations=CYPD",
+                "&stations=CXNP", "&stations=CXMY", "&stations=CYAW", "&stations=CWKG",
+                "&stations=CWVU", "&stations=CXLB", "&stations=CWSA", "&stations=CWRN"
+            ],
+            [
+                "&stations=CYLT", "&stations=CWEU", "&stations=CWGZ", "&stations=CYIO",
+                "&stations=CXSE", "&stations=CYCB", "&stations=CWIL", "&stations=CXWB",
+                "&stations=CYZS", "&stations=CWJC", "&stations=CYFB", "&stations=CWUW"
+            ]
+        ]
-        # Choose start and end dates
-        start_day = 1
-        start_month = 1
-        start_year = 2014
-        formatted_date = datetime(start_year, start_month, start_day).strftime("%Y-%m-%d")
+        # US states and stations - each sub-network
+        us_state_networks = {
+            state: f"{state}_ASOS" for state in [
+                "AL", "AR", "AZ", "CA", "CO", "CT", "DE", "FL", "GA", "IA", "ID", "IL", "IN",
+                "KS", "KY", "LA", "MA", "MD", "ME", "MI", "MN", "MO", "MS", "MT", "NC", "ND",
+                "NE", "NH", "NJ", "NM", "NV", "NY", "OH", "OK", "OR", "PA", "RI", "SC", "SD",
+                "TN", "TX", "UT", "VA", "VT", "WA", "WI", "WV", "WY"
+            ]
+        }
+        us_stations_map = {
+            "AL_ASOS": ["&stations=BHM", "&stations=HSV", "&stations=MGM", "&stations=MOB", "&stations=TCL"],
+            "AR_ASOS": ["&stations=LIT", "&stations=FSM", "&stations=TXK", "&stations=HOT", "&stations=FYV"],
+            "AZ_ASOS": ["&stations=PHX", "&stations=TUS", "&stations=FLG", "&stations=YUM", "&stations=PRC"],
+            "CA_ASOS": ["&stations=LAX", "&stations=SAN", "&stations=SJC", "&stations=SFO", "&stations=FAT"],
+            "CO_ASOS": ["&stations=DEN", "&stations=COS", "&stations=GJT", "&stations=PUB", "&stations=ASE"],
+            "CT_ASOS": ["&stations=BDL", "&stations=HVN", "&stations=BDR", "&stations=GON", "&stations=HFD"],
+            "DE_ASOS": ["&stations=ILG", "&stations=GED", "&stations=DOV"],
+            "FL_ASOS": ["&stations=MIA", "&stations=TPA", "&stations=ORL", "&stations=JAX", "&stations=TLH"],
+            "GA_ASOS": ["&stations=ATL", "&stations=SAV", "&stations=CSG", "&stations=MCN", "&stations=AGS"],
+            "IA_ASOS": ["&stations=DSM", "&stations=CID", "&stations=DBQ", "&stations=ALO", "&stations=SUX"],
+            "ID_ASOS": ["&stations=BOI", "&stations=IDA", "&stations=PIH", "&stations=SUN", "&stations=COE"],
+            "IL_ASOS": ["&stations=ORD", "&stations=MDW", "&stations=PIA", "&stations=SPI", "&stations=MLI"],
+            "IN_ASOS": ["&stations=IND", "&stations=FWA", "&stations=SBN", "&stations=EVV", "&stations=HUF"],
+            "KS_ASOS": ["&stations=ICT", "&stations=FOE", "&stations=GCK", "&stations=HYS", "&stations=SLN"],
+            "KY_ASOS": ["&stations=SDF", "&stations=LEX", "&stations=CVG", "&stations=PAH", "&stations=BWG"],
+            "LA_ASOS": ["&stations=MSY", "&stations=SHV", "&stations=LFT", "&stations=BTR", "&stations=MLU"],
+            "MA_ASOS": ["&stations=BOS", "&stations=ORH", "&stations=HYA", "&stations=ACK", "&stations=BED"],
+            "MD_ASOS": ["&stations=BWI", "&stations=MTN", "&stations=SBY", "&stations=HGR", "&stations=ADW"],
+            "ME_ASOS": ["&stations=PWM", "&stations=BGR", "&stations=CAR", "&stations=PQI", "&stations=RKD"],
+            "MI_ASOS": ["&stations=DTW", "&stations=GRR", "&stations=FNT", "&stations=LAN", "&stations=MKG"],
+            "MN_ASOS": ["&stations=MSP", "&stations=DLH", "&stations=RST", "&stations=STC", "&stations=INL"],
+            "MO_ASOS": ["&stations=STL", "&stations=MCI", "&stations=SGF", "&stations=COU", "&stations=JLN"],
+            "MS_ASOS": ["&stations=JAN", "&stations=GPT", "&stations=MEI", "&stations=PIB", "&stations=GLH"],
+            "MT_ASOS": ["&stations=BIL", "&stations=MSO", "&stations=GTF", "&stations=HLN", "&stations=BZN"],
+            "NC_ASOS": ["&stations=CLT", "&stations=RDU", "&stations=GSO", "&stations=ILM", "&stations=AVL"],
+            "ND_ASOS": ["&stations=BIS", "&stations=FAR", "&stations=GFK", "&stations=ISN", "&stations=JMS"],
+            "NE_ASOS": ["&stations=OMA"],
+            "NH_ASOS": ["&stations=MHT", "&stations=PSM", "&stations=CON", "&stations=LEB", "&stations=ASH"],
+            "NJ_ASOS": ["&stations=EWR", "&stations=ACY", "&stations=TTN", "&stations=MMU", "&stations=TEB"],
+            "NM_ASOS": ["&stations=ABQ", "&stations=SAF", "&stations=ROW", "&stations=HOB", "&stations=FMN"],
+            "NV_ASOS": ["&stations=LAS"],
+            "NY_ASOS": ["&stations=JFK", "&stations=LGA", "&stations=BUF", "&stations=ALB", "&stations=SYR"],
+            "OH_ASOS": ["&stations=CMH"],
+            "OK_ASOS": ["&stations=OKC", "&stations=TUL", "&stations=LAW", "&stations=SWO", "&stations=PNC"],
+            "OR_ASOS": ["&stations=PDX"],
+            "PA_ASOS": ["&stations=PHL", "&stations=PIT", "&stations=ERI", "&stations=MDT", "&stations=AVP"],
+            "RI_ASOS": ["&stations=PVD", "&stations=WST", "&stations=UUU"],
+            "SC_ASOS": ["&stations=CHS", "&stations=CAE", "&stations=GSP", "&stations=MYR", "&stations=FLO"],
+            "SD_ASOS": ["&stations=FSD", "&stations=RAP", "&stations=PIR", "&stations=ABR", "&stations=YKN"],
+            "TN_ASOS": ["&stations=BNA", "&stations=MEM", "&stations=TYS", "&stations=CHA", "&stations=TRI"],
+            "TX_ASOS": ["&stations=DFW", "&stations=IAH", "&stations=AUS", "&stations=SAT", "&stations=ELP"],
+            "UT_ASOS": ["&stations=SLC", "&stations=OGD", "&stations=PVU", "&stations=SGU", "&stations=CNY"],
+            "VA_ASOS": ["&stations=DCA", "&stations=RIC", "&stations=ROA", "&stations=ORF", "&stations=SHD"],
+            "VT_ASOS": ["&stations=BTV", "&stations=MPV", "&stations=RUT", "&stations=VSF", "&stations=MVL"],
+            "WA_ASOS": ["&stations=SEA", "&stations=GEG", "&stations=TIW", "&stations=VUO", "&stations=BFI"],
+            "WI_ASOS": ["&stations=MKE", "&stations=MSN", "&stations=GRB", "&stations=EAU", "&stations=LSE"],
+            "WV_ASOS": ["&stations=CRW", "&stations=CKB", "&stations=HTS", "&stations=MGW", "&stations=BKW"],
+            "WY_ASOS": ["&stations=CPR", "&stations=JAC", "&stations=SHR", "&stations=COD", "&stations=RKS"],
+        }
+        # --- Date setup --- #
+        date_object = datetime.strptime(start_date, "%Y-%m-%d")
+        start_day = date_object.day
+        start_month = date_object.month
+        start_year = date_object.year
+        formatted_date = f"{start_year:04d}-01-01"  # "2000-01-01"
         today = datetime.now()
-        end_day = today.day
-        end_month = today.month
-        end_year = today.year
-        if country == "GB__ASOS":
-            stations = ["&stations=EGCC", "&stations=EGNM", "&stations=EGBB",
-                        "&stations=EGSH", "&stations=EGFF", "&stations=EGHI",
-                        "&stations=EGLC", "&stations=EGHQ", "&stations=EGAC",
-                        "&stations=EGPF", "&stations=EGGD", "&stations=EGPE",
-                        "&stations=EGNT"]
-        elif country == "AU__ASOS":
-            stations = ["&stations=YPDN", "&stations=YBCS", "&stations=YBBN",
-                        "&stations=YSSY", "&stations=YSSY", "&stations=YMEN",
-                        "&stations=YPAD", "&stations=YPPH"]
-        elif country == "USCRN":
-            stations = ["&stations=64756", "&stations=64758", "&stations=03761", "&stations=54797",  # North
-                        "&stations=53968", "&stations=53960", "&stations=54932", "&stations=13301",  # Midwest
-                        "&stations=64756", "&stations=64756", "&stations=92821", "&stations=63862",  # South
-                        "&stations=53152", "&stations=93245", "&stations=04138", "&stations=04237"]  # West
-        elif country == "DE__ASOS":
-            stations = ["&stations=EDDL", "&stations=EDDH", "&stations=EDDB",
-                        "&stations=EDDN", "&stations=EDDF", "&stations=EDDK",
-                        "&stations=EDLW", "&stations=EDDM"]
-        elif country == "FR__ASOS":
-            stations = ["&stations=LFPB"]
-        elif country == "Canada":
-            institute_vector = ["CA_NB_ASOS", "CA_NF_ASOS", "CA_NT_ASOS", "CA_NS_ASOS",
-                                "CA_NU_ASOS"]
-            stations_list = [[] for _ in range(5)]
-            stations_list[0].append(["&stations=CYQM", "&stations=CERM", "&stations=CZCR",
-                                    "&stations=CZBF", "&stations=CYFC", "&stations=CYCX"])
-            stations_list[1].append(["&stations=CWZZ", "&stations=CYDP", "&stations=CYMH",
-                                    "&stations=CYAY", "&stations=CWDO", "&stations=CXTP",
-                                    "&stations=CYJT", "&stations=CYYR", "&stations=CZUM",
-                                    "&stations=CYWK", "&stations=CYWK"])
-            stations_list[2].append(["&stations=CYHI", "&stations=CZCP", "&stations=CWLI",
-                                    "&stations=CWND", "&stations=CXTV", "&stations=CYVL",
-                                    "&stations=CYCO", "&stations=CXDE", "&stations=CYWE",
-                                    "&stations=CYLK", "&stations=CWID", "&stations=CYRF",
-                                    "&stations=CXYH", "&stations=CYWY", "&stations=CWMT"])
-            stations_list[3].append(["&stations=CWEF", "&stations=CXIB", "&stations=CYQY",
-                                    "&stations=CYPD", "&stations=CXNP", "&stations=CXMY",
-                                    "&stations=CYAW", "&stations=CWKG", "&stations=CWVU",
-                                    "&stations=CXLB", "&stations=CWSA", "&stations=CWRN"])
-            stations_list[4].append(["&stations=CYLT", "&stations=CWEU", "&stations=CWGZ",
-                                    "&stations=CYIO", "&stations=CXSE", "&stations=CYCB",
-                                    "&stations=CWIL", "&stations=CXWB", "&stations=CYZS",
-                                    "&stations=CWJC", "&stations=CYFB", "&stations=CWUW"])
-        elif country == "ZA__ASOS":
-            cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
-            stations = []
+        end_day, end_month, end_year = today.day, today.month, today.year
-            for city in cities:
-                geolocator = Nominatim(user_agent="MyApp")
-                location = geolocator.geocode(city)
-                stations.append(f"&latitude={location.latitude}&longitude={location.longitude}")
+        # ------------------------------------------------------------------ #
+        # Utility functions
+        # ------------------------------------------------------------------ #
+        def convert_f_to_c(series_f: pd.Series) -> pd.Series:
+            """Convert Fahrenheit to Celsius."""
+            return (series_f - 32) * 5.0 / 9.0
+        def fetch_mesonet_data(network: str, stations: list) -> pd.DataFrame:
+            """Fetch station-based data (daily) from Iowa Mesonet."""
+            import csv
-        # Temperature
-        if country in ["GB__ASOS", "AU__ASOS", "DE__ASOS", "FR__ASOS"]:
-            # We start by making a data frame of the following weather stations
             station_query = ''.join(stations)
+            url = (
+                "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?"
+                f"network={network}{station_query}"
+                f"&year1={start_year}&month1={start_month}&day1={start_day}"
+                f"&year2={end_year}&month2={end_month}&day2={end_day}"
+            )
+            with urllib.request.urlopen(url) as f:
+                df = pd.read_csv(f, dtype=str, quoting=csv.QUOTE_ALL)
+            return df
+        def fetch_canada_data() -> pd.DataFrame:
+            """Canada uses multiple sub-networks. Combine them all."""
+            import csv
+            final_df = pd.DataFrame()
+            for i, institute_temp in enumerate(institute_vector):
+                station_query_temp = ''.join(stations_list_canada[i])
+                mesonet_url = (
+                    "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?"
+                    f"network={institute_temp}{station_query_temp}"
+                    f"&year1={start_year}&month1={start_month}&day1={start_day}"
+                    f"&year2={end_year}&month2={end_month}&day2={end_day}"
+                )
+                with urllib.request.urlopen(mesonet_url) as f:
+                    temp_df = pd.read_csv(f, dtype=str, quoting=csv.QUOTE_ALL)
-            raw_weather_list = ''.join(["https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=", country,
-                                        station_query,
-                                        "&year1=", str(start_year), "&month1=", str(start_month), "&day1=", str(start_day),
-                                        "&year2=", str(end_year), "&month2=", str(end_month), "&day2=", str(end_day)])
-            raw_weather = urllib.request.urlopen(raw_weather_list)
-            raw_weather = pd.read_csv(raw_weather)
-            # Replace the occurrences of "None" with Missing Value
-            raw_weather["max_temp_f"].replace("None", 0, inplace=True)
-            raw_weather["min_temp_f"].replace("None", 0, inplace=True)
-            # Remove any data that isn't temperature-related
-            weather = raw_weather.iloc[:, 0:4]
-            weather[["max_temp_f", "min_temp_f"]] = weather[["max_temp_f", "min_temp_f"]].apply(pd.to_numeric)
-            # Estimate mean temperature
-            weather["mean_temp_f"] = (weather["max_temp_f"] + weather["min_temp_f"]) / 2
-            # Convert Fahrenheit to Celsius for max_temp_f
-            weather["max_temp_c"] = (weather["max_temp_f"] - 32) * 5 / 9
-            # Convert Fahrenheit to Celsius for min_temp_f
-            weather["min_temp_c"] = (weather["min_temp_f"] - 32) * 5 / 9
-            # Convert Fahrenheit to Celsius for mean_temp_f
-            weather["mean_temp_c"] = (weather["mean_temp_f"] - 32) * 5 / 9
-            # Aggregate the data to week commencing sunday taking the average of the data
-            # Convert the date column to a Date type
-            weather["day"] = pd.to_datetime(weather["day"], format="%Y-%m-%d")
-            # Determine the starting chosen day for each date
-            weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-            # Group by week_starting and summarize
-            numeric_columns = weather.select_dtypes(include='number').columns
-            weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
-            weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
-                                            "min_temp_f": "avg_min_temp_f",
-                                            "mean_temp_f": "avg_mean_temp_f",
-                                            "max_temp_c": "avg_max_temp_c",
-                                            "min_temp_c": "avg_min_temp_c",
-                                            "mean_temp_c": "avg_mean_temp_c"}, inplace=True)
-        elif country == "Canada":
-            for i in range(len(institute_vector)):
-                station_query_temp = ''.join(flatten_list(stations_list[i]))
-                institute_temp = institute_vector[i]
-                raw_weather_temp = ''.join(["https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=", institute_temp,
-                                            station_query_temp,
-                                            "&year1=", str(start_year), "&month1=", str(start_month), "&day1=", str(start_day),
-                                            "&year2=", str(end_year), "&month2=", str(end_month), "&day2=", str(end_day)])
-                raw_weather_temp = urllib.request.urlopen(raw_weather_temp)
-                raw_weather_temp = pd.read_csv(raw_weather_temp)
-                if len(raw_weather_temp.index) == 0:
-                    continue
-                raw_weather_temp = raw_weather_temp[['station', 'day', 'max_temp_f', 'min_temp_f', 'precip_in']]
+                if not temp_df.empty:
+                    final_df = pd.concat([final_df, temp_df], ignore_index=True)
+            return final_df
-                if i == 1:
-                    raw_weather = raw_weather_temp
-                else:
-                    raw_weather = pd.concat([raw_weather, raw_weather_temp])
-                # Drop error column if it exists
-                if 'ERROR: Invalid network specified' in list(raw_weather.columns):
-                    raw_weather.drop('ERROR: Invalid network specified', axis=1, inplace=True)
-                # Replace none values
-                raw_weather["max_temp_f"].replace("None", 0, inplace=True)
-                raw_weather["min_temp_f"].replace("None", 0, inplace=True)
-                raw_weather["precip_in"].replace("None", 0, inplace=True)
-                weather = raw_weather
-                weather[["max_temp_f", "min_temp_f", "precip_in"]] = weather[["max_temp_f", "min_temp_f", "precip_in"]].apply(pd.to_numeric)
-                # Estimate mean temperature
-                weather["mean_temp_f"] = (weather["max_temp_f"] + weather["min_temp_f"]) / 2
-                # Convert Fahrenheit to Celsius for max_temp_f
-                weather["max_temp_c"] = (weather["max_temp_f"] - 32) * 5 / 9
-                # Convert Fahrenheit to Celsius for min_temp_f
-                weather["min_temp_c"] = (weather["min_temp_f"] - 32) * 5 / 9
-                # Convert Fahrenheit to Celsius for mean_temp_f
-                weather["mean_temp_c"] = (weather["mean_temp_f"] - 32) * 5 / 9
-                # Aggregate the data to week commencing sunday taking the average of the data
-                # Convert the date column to a Date type
-                weather["day"] = pd.to_datetime(weather["day"], format="%Y-%m-%d")
-                # Determine the starting chosen day for each date
-                weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-                # Group by week_starting and summarize
-                numeric_columns = weather.select_dtypes(include='number').columns
-                weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
-                weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
-                                                "min_temp_f": "avg_min_temp_f",
-                                                "mean_temp_f": "avg_mean_temp_f",
-                                                "max_temp_c": "avg_max_temp_c",
-                                                "min_temp_c": "avg_min_temp_c",
-                                                "mean_temp_c": "avg_mean_temp_c",
-                                                "precip_in": "avg_mean_perc"}, inplace=True)
-        elif country == "ZA__ASOS":
+        def fetch_openmeteo_rain_snow(cities: list) -> pd.DataFrame:
+            """
+            Fetch daily precipitation_sum (rain) and snowfall_sum (snow) from Open-Meteo.
+            Returns columns: ["date", "rain_sum", "snow_sum", "city"] for each day.
+            We'll then do a weekly aggregator that yields avg_rain_sum, avg_snow_sum.
+            """
             weather_data_list = []
+            geolocator = Nominatim(user_agent="MyApp")
             for city in cities:
-                geolocator = Nominatim(user_agent="MyApp")
-                location = geolocator.geocode(city)
-                url = "https://archive-api.open-meteo.com/v1/archive"
+                loc = geolocator.geocode(city)
+                if not loc:
+                    print(f"Could not find location for {city}, skipping.")
+                    continue
+                url = "https://archive-api.open-meteo.com/v1/archive"
                 params = {
-                    "latitude": location.latitude,
-                    "longitude": location.longitude,
+                    "latitude": loc.latitude,
+                    "longitude": loc.longitude,
                     "start_date": formatted_date,
                     "end_date": today.strftime("%Y-%m-%d"),
-                    "daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
+                    "daily": "precipitation_sum,snowfall_sum",
                     "timezone": "auto"
                 }
+                resp = requests.get(url, params=params)
+                if resp.status_code != 200:
+                    print(f"[ERROR] open-meteo returned status {resp.status_code} for city={city}")
+                    continue
+                try:
+                    data_json = resp.json()
+                except ValueError:
+                    print(f"[ERROR] invalid JSON from open-meteo for city={city}")
+                    continue
-                response = requests.get(url, params=params)
-                response_data = response.json()
-                daily_data = response_data["daily"]
-                dates = daily_data["time"]
+                daily_block = data_json.get("daily", {})
+                if not {"time", "precipitation_sum", "snowfall_sum"}.issubset(daily_block.keys()):
+                    print(f"[ERROR] missing required keys in open-meteo for city={city}")
+                    continue
-                data = pd.DataFrame({
-                    "day": dates,
-                    "max_temp_f": daily_data["temperature_2m_max"],
-                    "min_temp_f": daily_data["temperature_2m_min"],
-                    "precip_in": daily_data["precipitation_sum"]
+                df_temp = pd.DataFrame({
+                    "date": daily_block["time"],
+                    "rain_sum": daily_block["precipitation_sum"],
+                    "snow_sum": daily_block["snowfall_sum"]
                 })
-                data["city"] = city
-                weather_data_list.append(data)
-            weather = pd.concat(weather_data_list)
-            # Convert the date column to a Date type
-            weather["day"] = pd.to_datetime(weather["day"])
-            # Replace None values
-            weather["max_temp_f"].replace("None", 0, inplace=True)
-            weather["min_temp_f"].replace("None", 0, inplace=True)
-            weather["precip_in"].replace("None", 0, inplace=True)
-            weather[["max_temp_f", "min_temp_f", "precip_in"]] = weather[["max_temp_f", "min_temp_f", "precip_in"]].apply(pd.to_numeric)
-            # Estimate mean temperature
-            weather["mean_temp_f"] = (weather["max_temp_f"] + weather["min_temp_f"]) / 2
-            # Convert Fahrenheit to Celsius for max_temp_f
-            weather["max_temp_c"] = (weather["max_temp_f"] - 32) * 5 / 9
-            # Convert Fahrenheit to Celsius for min_temp_f
-            weather["min_temp_c"] = (weather["min_temp_f"] - 32) * 5 / 9
-            # Convert Fahrenheit to Celsius for mean_temp_f
-            weather["mean_temp_c"] = (weather["mean_temp_f"] - 32) * 5 / 9
-            # Determine the starting chosen day for each date
-            weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-            # Group by week_starting and summarize
-            numeric_columns = weather.select_dtypes(include='number').columns
-            weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
-            weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
-                                            "min_temp_f": "avg_min_temp_f",
-                                            "mean_temp_f": "avg_mean_temp_f",
-                                            "max_temp_c": "avg_max_temp_c",
-                                            "min_temp_c": "avg_min_temp_c",
-                                            "mean_temp_c": "avg_mean_temp_c",
-                                            "precip_in": "avg_mean_perc"}, inplace=True)
-        else:
-            # We start by making a data frame of the following weather stations
-            station_query = ''.join(stations)
-            raw_weather_list = ''.join(["https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=", country,
-                                        station_query,
-                                        "&year1=", str(start_year), "&month1=", str(start_month), "&day1=", str(start_day),
-                                        "&year2=", str(end_year), "&month2=", str(end_month), "&day2=", str(end_day)])
-            raw_weather = urllib.request.urlopen(raw_weather_list)
-            raw_weather = pd.read_csv(raw_weather)
-            raw_weather = raw_weather[['day', 'max_temp_f', 'min_temp_f', 'precip_in']]
-            # Replace the occurrences of "None" with Missing Value
-            raw_weather["max_temp_f"].replace("None", 0, inplace=True)
-            raw_weather["min_temp_f"].replace("None", 0, inplace=True)
-            raw_weather["precip_in"].replace("None", 0, inplace=True)
-            # Remove any data that isn't temperature-related
-            weather = raw_weather
-            weather[["max_temp_f", "min_temp_f", "precip_in"]] = weather[["max_temp_f", "min_temp_f", "precip_in"]].apply(pd.to_numeric)
-            # Estimate mean temperature
-            weather["mean_temp_f"] = (weather["max_temp_f"] + weather["min_temp_f"]) / 2
-            # Convert Fahrenheit to Celsius for max_temp_f
-            weather["max_temp_c"] = (weather["max_temp_f"] - 32) * 5 / 9
-            # Convert Fahrenheit to Celsius for min_temp_f
-            weather["min_temp_c"] = (weather["min_temp_f"] - 32) * 5 / 9
-            # Convert Fahrenheit to Celsius for mean_temp_f
-            weather["mean_temp_c"] = (weather["mean_temp_f"] - 32) * 5 / 9
-            # Aggregate the data to week commencing sunday taking the average of the data
-            # Convert the date column to a Date type
-            weather["day"] = pd.to_datetime(weather["day"], format="%Y-%m-%d")
-            # Determine the starting chosen day for each date
-            weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-            # Group by week_starting and summarize
-            numeric_columns = weather.select_dtypes(include='number').columns
-            weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
-            weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
-                                            "min_temp_f": "avg_min_temp_f",
-                                            "mean_temp_f": "avg_mean_temp_f",
-                                            "max_temp_c": "avg_max_temp_c",
-                                            "min_temp_c": "avg_min_temp_c",
-                                            "mean_temp_c": "avg_mean_temp_c",
-                                            "precip_in": "avg_mean_perc"}, inplace=True)
-        # Rainfall
-        if country == "GB__ASOS":
-            # Define cities and date range
-            cities = ["Manchester", "Leeds", "Birmingham", "Norwich", "Cardiff", "Southampton", "London", "Newquay", "Belfast", "Glasgow", "Bristol", "Newcastle"]
-            start_date = formatted_date
-            end_date = today.strftime("%Y-%m-%d")
-            # Initialize an empty list to store the weather data for each city
-            weather_data_list = []
-            # Loop through each city and fetch weather data
-            for city in cities:
-                # Initialize Nominatim API
-                geolocator = Nominatim(user_agent="MyApp")
-                location = geolocator.geocode(city)
-                url = "https://archive-api.open-meteo.com/v1/archive"
-                params = {
-                    "latitude": location.latitude,
-                    "longitude": location.longitude,
-                    "start_date": start_date,
-                    "end_date": end_date,
-                    "daily": "precipitation_sum",
-                    "timezone": "auto"
-                }
-                response = requests.get(url, params=params)
-                response_data = response.json()
-                daily_data = response_data["daily"]["precipitation_sum"]
-                dates = response_data["daily"]["time"]
-                data = pd.DataFrame({"date": dates, "rainfall": daily_data})
-                data["city"] = city
-                weather_data_list.append(data)
-            # Combine all city data into a single data frame
-            all_weather_data = pd.concat(weather_data_list)
-            # Convert the date column to a Date type
-            all_weather_data["date"] = pd.to_datetime(all_weather_data["date"])
-            # Set week commencing col up
-            all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-            # Group by week_starting and summarize
-            numeric_columns = all_weather_data.select_dtypes(include='number').columns
-            weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
-            weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
-            # Change index to datetime
-            weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
-        elif country == "AU__ASOS":
-            # Define cities and date range
-            cities = ["Darwin", "Cairns", "Brisbane", "Sydney", "Melbourne", "Adelaide", "Perth"]
-            start_date = formatted_date
-            end_date = today.strftime("%Y-%m-%d")
-            # Initialize an empty list to store the weather data for each city
-            weather_data_list = []
-            # Loop through each city and fetch weather data
-            for city in cities:
-                # Initialize Nominatim API
-                geolocator = Nominatim(user_agent="MyApp")
-                location = geolocator.geocode(city)
-                url = "https://archive-api.open-meteo.com/v1/archive"
-                params = {
-                    "latitude": location.latitude,
-                    "longitude": location.longitude,
-                    "start_date": start_date,
-                    "end_date": end_date,
-                    "daily": "precipitation_sum",
-                    "timezone": "auto"
-                }
-                response = requests.get(url, params=params)
-                response_data = response.json()
-                daily_data = response_data["daily"]["precipitation_sum"]
-                dates = response_data["daily"]["time"]
-                data = pd.DataFrame({"date": dates, "rainfall": daily_data})
-                data["city"] = city
-                weather_data_list.append(data)
-            # Combine all city data into a single data frame
-            all_weather_data = pd.concat(weather_data_list)
-            # Convert the date column to a Date type
-            all_weather_data["date"] = pd.to_datetime(all_weather_data["date"])
+                df_temp["city"] = city
+                weather_data_list.append(df_temp)
-            # Set week commencing col up
-            all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-            # Group by week_starting and summarize
-            numeric_columns = all_weather_data.select_dtypes(include='number').columns
-            weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
-            weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
-            # Change index to datetime
-            weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
-        elif country == "DE__ASOS":
-            # Define cities and date range
-            cities = ["Dortmund", "Düsseldorf", "Frankfurt", "Munich", "Cologne", "Berlin", "Hamburg", "Nuernberg"]
-            start_date = formatted_date
-            end_date = today.strftime("%Y-%m-%d")
-            # Initialize an empty list to store the weather data for each city
-            weather_data_list = []
-            # Loop through each city and fetch weather data
-            for city in cities:
-                # Initialize Nominatim API
-                geolocator = Nominatim(user_agent="MyApp")
-                location = geolocator.geocode(city)
-                url = "https://archive-api.open-meteo.com/v1/archive"
-                params = {
-                    "latitude": location.latitude,
-                    "longitude": location.longitude,
-                    "start_date": start_date,
-                    "end_date": end_date,
-                    "daily": "precipitation_sum",
-                    "timezone": "auto"
-                }
-                response = requests.get(url, params=params)
-                response_data = response.json()
-                daily_data = response_data["daily"]["precipitation_sum"]
-                dates = response_data["daily"]["time"]
-                data = pd.DataFrame({"date": dates, "rainfall": daily_data})
-                data["city"] = city
-                weather_data_list.append(data)
-            # Combine all city data into a single data frame
-            all_weather_data = pd.concat(weather_data_list)
-            # Convert the date column to a Date type
-            all_weather_data["date"] = pd.to_datetime(all_weather_data["date"])
-            # Set week commencing col up
-            all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-            # Group by week_starting and summarize
-            numeric_columns = all_weather_data.select_dtypes(include='number').columns
-            weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
-            weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
-            # Change index to datetime
-            weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
-        elif country == "FR__ASOS":
-            # Define cities and date range
-            cities = ["Paris"]
-            start_date = formatted_date
-            end_date = today.strftime("%Y-%m-%d")
-            # Initialize an empty list to store the weather data for each city
-            weather_data_list = []
-            # Loop through each city and fetch weather data
-            for city in cities:
-                # Initialize Nominatim API
-                geolocator = Nominatim(user_agent="MyApp")
-                location = geolocator.geocode(city)
-                url = "https://archive-api.open-meteo.com/v1/archive"
-                params = {
-                    "latitude": location.latitude,
-                    "longitude": location.longitude,
-                    "start_date": start_date,
-                    "end_date": end_date,
-                    "daily": "precipitation_sum",
-                    "timezone": "auto"
-                }
-                response = requests.get(url, params=params)
-                response_data = response.json()
-                daily_data = response_data["daily"]["precipitation_sum"]
-                dates = response_data["daily"]["time"]
-                data = pd.DataFrame({"date": dates, "rainfall": daily_data})
-                data["city"] = city
-                weather_data_list.append(data)
-            # Combine all city data into a single data frame
-            all_weather_data = pd.concat(weather_data_list)
-            # Convert the date column to a Date type
-            all_weather_data["date"] = pd.to_datetime(all_weather_data["date"])
-            # Set week commencing col up
-            all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-            # Group by week_starting and summarize
-            numeric_columns = all_weather_data.select_dtypes(include='number').columns
-            weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
-            weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
-            # Change index to datetime
-            weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
-        elif country == "ZA__ASOS":
-            cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
-            start_date = formatted_date
-            end_date = today.strftime("%Y-%m-%d")
-            weather_data_list = []
-            for city in cities:
-                geolocator = Nominatim(user_agent="MyApp")
-                location = geolocator.geocode(city)
-                url = "https://archive-api.open-meteo.com/v1/archive"
+            if weather_data_list:
+                return pd.concat(weather_data_list, ignore_index=True)
+            else:
+                return pd.DataFrame()
-                params = {
-                    "latitude": location.latitude,
-                    "longitude": location.longitude,
-                    "start_date": start_date,
-                    "end_date": end_date,
-                    "daily": "precipitation_sum",
-                    "timezone": "auto"
-                }
+        def weekly_aggregate_temp_mesonet(df: pd.DataFrame) -> pd.DataFrame:
+            """
+            For NON-US mesonet data, we only keep max_temp_f, min_temp_f,
+            then compute mean_temp_f, plus Celsius, and do weekly average.
+            """
+            import pandas as pd
+            # Convert day col
+            if "day" not in df.columns:
+                return pd.DataFrame()
+            # Only keep relevant columns
+            keep_cols = []
+            for c in ["day", "max_temp_f", "min_temp_f"]:
+                if c in df.columns:
+                    keep_cols.append(c)
+            df = df[keep_cols].copy()
+            # Convert "None" => numeric
+            for c in ["max_temp_f", "min_temp_f"]:
+                if c in df.columns:
+                    df[c] = df[c].replace("None", pd.NA)
+                    df[c] = pd.to_numeric(df[c], errors="coerce")
+            df["day"] = pd.to_datetime(df["day"], errors="coerce")
+            df["mean_temp_f"] = (df["max_temp_f"] + df["min_temp_f"]) / 2
+            df["max_temp_c"] = convert_f_to_c(df["max_temp_f"])
+            df["min_temp_c"] = convert_f_to_c(df["min_temp_f"])
+            df["mean_temp_c"] = convert_f_to_c(df["mean_temp_f"])
+            # Group by "week_starting"
+            df["week_starting"] = df["day"].apply(
+                lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7)
+                if pd.notnull(x) else pd.NaT
+            )
+            numeric_cols = df.select_dtypes(include='number').columns
+            weekly = df.groupby("week_starting")[numeric_cols].mean()
+            # Rename columns
+            rename_map = {
+                "max_temp_f": "avg_max_temp_f",
+                "min_temp_f": "avg_min_temp_f",
+                "mean_temp_f": "avg_mean_temp_f",
+                "max_temp_c": "avg_max_temp_c",
+                "min_temp_c": "avg_min_temp_c",
+                "mean_temp_c": "avg_mean_temp_c",
+            }
+            weekly.rename(columns=rename_map, inplace=True)
+            # Return as a DataFrame w/ index = week_starting
+            return weekly
+        def weekly_aggregate_rain_snow_openmeteo(df: pd.DataFrame) -> pd.DataFrame:
+            """
+            For NON-US, from open-meteo, we have daily columns 'date','rain_sum','snow_sum'.
+            We'll do weekly average of each. -> 'avg_rain_sum', 'avg_snow_sum'.
+            """
+            import pandas as pd
+            if "date" not in df.columns:
+                return pd.DataFrame()
+            df["date"] = pd.to_datetime(df["date"], errors="coerce")
+            df["week_starting"] = df["date"].apply(
+                lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7)
+                if pd.notnull(x) else pd.NaT
+            )
-                response = requests.get(url, params=params)
-                response_data = response.json()
+            # Convert to numeric
+            for c in ["rain_sum", "snow_sum"]:
+                if c in df.columns:
+                    df[c] = pd.to_numeric(df[c], errors="coerce")
-                daily_data = response_data["daily"]["precipitation_sum"]
-                dates = response_data["daily"]["time"]
+            numeric_cols = df.select_dtypes(include='number').columns
+            weekly = df.groupby("week_starting")[numeric_cols].mean()
-                data = pd.DataFrame({"date": dates, "rainfall": daily_data})
-                data["city"] = city
+            rename_map = {
+                "rain_sum": "avg_rain_sum",
+                "snow_sum": "avg_snow_sum"
+            }
+            weekly.rename(columns=rename_map, inplace=True)
+            return weekly
-                weather_data_list.append(data)
+        def weekly_aggregate_us(df: pd.DataFrame) -> pd.DataFrame:
+            """
+            For US Mesonet data (per state), we keep max_temp_f, min_temp_f, precip_in, snow_in,
+            then compute mean_temp_f & convert to celsius, group weekly.
+            We'll rename:
+            max_temp_f -> avg_max_temp_f
+            min_temp_f -> avg_min_temp_f
+            mean_temp_f -> avg_mean_temp_f
+            precip_in -> avg_rain_sum
+            snow_in -> avg_snow_sum
+            """
+            import pandas as pd
+            if "day" not in df.columns:
+                return pd.DataFrame()
+            # Convert day
+            df["day"] = pd.to_datetime(df["day"], errors="coerce")
+            # Convert "None" => numeric
+            for c in ["max_temp_f", "min_temp_f", "precip_in", "snow_in"]:
+                if c in df.columns:
+                    df[c] = df[c].replace("None", pd.NA)
+                    df[c] = pd.to_numeric(df[c], errors="coerce")
+            # Compute mean_temp_f, celsius
+            df["mean_temp_f"] = (df["max_temp_f"] + df["min_temp_f"]) / 2
+            df["max_temp_c"] = convert_f_to_c(df["max_temp_f"])
+            df["min_temp_c"] = convert_f_to_c(df["min_temp_f"])
+            df["mean_temp_c"] = convert_f_to_c(df["mean_temp_f"])
+            # Weekly grouping
+            df["week_starting"] = df["day"].apply(
+                lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7)
+                if pd.notnull(x) else pd.NaT
+            )
+            numeric_cols = df.select_dtypes(include='number').columns
+            weekly = df.groupby("week_starting")[numeric_cols].mean()
+            rename_map = {
+                "max_temp_f": "avg_max_temp_f",
+                "min_temp_f": "avg_min_temp_f",
+                "mean_temp_f": "avg_mean_temp_f",
+                "max_temp_c": "avg_max_temp_c",
+                "min_temp_c": "avg_min_temp_c",
+                "mean_temp_c": "avg_mean_temp_c",
+                "precip_in": "avg_rain_sum",
+                "snow_in": "avg_snow_sum"
+            }
+            weekly.rename(columns=rename_map, inplace=True)
+            return weekly
+        def rename_with_prefix(df: pd.DataFrame, prefix: str) -> pd.DataFrame:
+            """Rename all columns except 'week_starting' or 'OBS' with the given prefix."""
+            df2 = df.copy()
+            new_cols = {}
+            for col in df2.columns:
+                if col not in ["week_starting", "OBS"]:
+                    new_cols[col] = prefix + col
+            df2.rename(columns=new_cols, inplace=True)
+            return df2
+        # ------------------------------------------------------------------ #
+        # The final combined DataFrame
+        # ------------------------------------------------------------------ #
+        combined_df = pd.DataFrame()
-            # Combine all city data into a single data frame
-            all_weather_data = pd.concat(weather_data_list)
+        # ------------------------------------------------------------------ #
+        # 1) Loop over each requested country
+        # ------------------------------------------------------------------ #
+        for country_code in country_codes:
+            net = country_dict.get(country_code)
+            if net is None:
+                print(f"Warning: Invalid country_code '{country_code}' – skipping.")
+                continue
-            # Convert the date column to a Date type
-            all_weather_data["date"] = pd.to_datetime(all_weather_data["date"])
+            # =========================
+            # 2) Special Logic for US
+            # =========================
+            if net == "US_STATES":
+                for state_code, network_code in us_state_networks.items():
+                    stations = us_stations_map.get(network_code, [])
+                    if not stations:
+                        print(f"[DEBUG] No stations for {network_code}, skipping.")
+                        continue
+                    raw_df = fetch_mesonet_data(network_code, stations)
+                    if raw_df.empty:
+                        print(f"[DEBUG] DataFrame empty for {network_code}, skipping.")
+                        continue
+                    weekly_state = weekly_aggregate_us(raw_df)
+                    if weekly_state.empty:
+                        print(f"[DEBUG] Aggregated weekly DataFrame empty for {network_code}, skipping.")
+                        continue
+                    weekly_state.reset_index(inplace=True)
+                    weekly_state.rename(columns={"week_starting": "OBS"}, inplace=True)
+                    # Now rename columns with prefix: seas_us_{statecode}_
+                    prefix = f"seas_us_{state_code.lower()}_"
+                    weekly_state = rename_with_prefix(weekly_state, prefix)
+                    # Merge into combined
+                    if combined_df.empty:
+                        combined_df = weekly_state
+                    else:
+                        combined_df = pd.merge(combined_df, weekly_state, on="OBS", how="outer")
+                # Done with the US. Move on to the next country in the loop
+                continue
-            # Set week commencing col up
-            all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
+            # =======================================
+            # 3) Logic for Non-US (AU, GB, DE, CA, ZA)
+            # =======================================
+            # A) Fetch temperature data from Mesonet
+            if net == "Canada":
+                raw_temp = fetch_canada_data()
+            else:
+                # e.g. "GB__ASOS", "AU__ASOS", "DE__ASOS", "ZA__ASOS" (if added)
+                stations = station_map.get(net, [])
+                if not stations and net != "ZA__ASOS":
+                    # If we have no stations for net and it's not ZA,
+                    # there's no data. (If ZA has stations, add them above.)
+                    raw_temp = pd.DataFrame()
+                else:
+                    raw_temp = fetch_mesonet_data(net, stations)
+            weekly_temp = pd.DataFrame()
+            if not raw_temp.empty:
+                # For these countries, we only keep max_temp_f, min_temp_f, mean_temp_f
+                weekly_temp = weekly_aggregate_temp_mesonet(raw_temp)
+            # B) Fetch rain+snow from Open-Meteo (only if we have an entry in rainfall_city_map)
+            weekly_precip = pd.DataFrame()
+            if net in rainfall_city_map:
+                city_list = rainfall_city_map[net]
+                df_rain_snow = fetch_openmeteo_rain_snow(city_list)
+                if not df_rain_snow.empty:
+                    weekly_precip = weekly_aggregate_rain_snow_openmeteo(df_rain_snow)
+            # C) Merge the temperature data + precip/snow data on the weekly index
+            if not weekly_temp.empty and not weekly_precip.empty:
+                merged_df = pd.merge(weekly_temp, weekly_precip, left_index=True, right_index=True, how="outer")
+            elif not weekly_temp.empty:
+                merged_df = weekly_temp
+            else:
+                merged_df = weekly_precip
-            # Group by week_starting and summarize
-            numeric_columns = all_weather_data.select_dtypes(include='number').columns
-            weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
-            weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
+            if merged_df.empty:
+                print(f"No data retrieved for country: {country_code}")
+                continue
-            # Change index to datetime
-            weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
+            # D) Convert index -> a column OBS
+            merged_df.reset_index(inplace=True)
+            merged_df.rename(columns={"week_starting": "OBS"}, inplace=True)
-        # Merge the dataframes
-        if country in ["AU__ASOS", "DE__ASOS", "FR__ASOS", "GB__ASOS", "ZA__ASOS"]:
-            merged_df = weekly_avg_rain.merge(weekly_avg_temp, on="week_starting")
-        else:
-            merged_df = weekly_avg_temp
+            # E) Rename with prefix = "seas_{country_code}_"
+            prefix = f"seas_{country_code.lower()}_"
+            merged_df = rename_with_prefix(merged_df, prefix)
-        merged_df.reset_index(drop=False, inplace=True)
-        merged_df.rename(columns={'week_starting': 'OBS'}, inplace=True)
+            # F) Merge into combined_df
+            if combined_df.empty:
+                combined_df = merged_df
+            else:
+                combined_df = pd.merge(combined_df, merged_df, on="OBS", how="outer")
-        final_weather = ims_proc.rename_cols(merged_df, 'seas_')
+        # ------------------------------------------------------------------ #
+        # 4) Sort final by OBS (optional)
+        # ------------------------------------------------------------------ #
+        if not combined_df.empty:
+            combined_df.sort_values(by="OBS", inplace=True)
-        return final_weather
+        return combined_df
     def pull_macro_ons_uk(self, cdid_list=None, week_start_day="mon", sector=None):
         """
         Fetches time series data for multiple CDIDs from the ONS API, converts it to daily frequency,
@@ -1481,3 +1405,228 @@ class datapull:
             print("No data available to process.")
             return pd.DataFrame()
+    def pull_sports_events(self, start_date="2020-01-01", week_commencing="mon"):
+        """
+        Combines scraping logic for:
+        - UEFA Champions League and NFL from TheSportsDB (website-scraping approach)
+        - FIFA World Cup, UEFA Euro, Rugby World Cup, Six Nations (via TheSportsDB API)
+        Returns a single merged DataFrame with all event dummy variables.
+        """
+        ############################################################
+        # 1) SCRAPE UEFA CHAMPIONS LEAGUE & NFL (YOUR FIRST FUNCTION)
+        ############################################################
+        def scrape_sports_events(start_date=start_date, week_commencing=week_commencing):
+            sports = {
+                "uefa_champions_league": {
+                    "league_id": "4480",
+                    "seasons_url": "https://www.thesportsdb.com/league/4480-UEFA-Champions-League?a=1#allseasons",
+                    "season_url_template": "https://www.thesportsdb.com/season/4480-UEFA-Champions-League/{season}&all=1&view=",
+                    "round_filters": ["quarter", "semi", "final"]
+                },
+                "nfl": {
+                    "league_id": "4391",
+                    "seasons_url": "https://www.thesportsdb.com/league/4391-NFL?a=1#allseasons",
+                    "season_url_template": "https://www.thesportsdb.com/season/4391-NFL/{season}&all=1&view=",
+                    "round_filters": ["quarter", "semi", "final"]
+                }
+            }
+            headers = {"User-Agent": "Mozilla/5.0"}
+            start_date_dt = datetime.strptime(start_date, "%Y-%m-%d")
+            # Create a full date range DataFrame
+            full_date_range = pd.date_range(start=start_date, end=pd.to_datetime("today"))
+            time_series_df = pd.DataFrame({"date": full_date_range})
+            time_series_df["seas_uefa_champions_league"] = 0
+            time_series_df["seas_nfl"] = 0
+            for sport, details in sports.items():
+                # Get available seasons
+                response = requests.get(details["seasons_url"], headers=headers)
+                if response.status_code != 200:
+                    continue  # Skip this sport if the request fails
+                soup = BeautifulSoup(response.text, "html.parser")
+                # Extract season names
+                seasons = []
+                for link in soup.find_all("a", href=True):
+                    href = link["href"]
+                    if "season" in href and sport.replace("_", "-") in href.lower():
+                        season_name = href.split("/")[-1]  # e.g. "2023-2024"
+                        try:
+                            season_start_year = int(season_name.split("-")[0])
+                            season_start_date = datetime(season_start_year, 1, 1)
+                            if season_start_date >= start_date_dt:
+                                seasons.append(season_name)
+                        except ValueError:
+                            continue
+                # Scrape matches for filtered seasons
+                filtered_matches = []
+                for season in seasons:
+                    season_url = details["season_url_template"].format(season=season)
+                    season_response = requests.get(season_url, headers=headers)
+                    if season_response.status_code != 200:
+                        continue
+                    season_soup = BeautifulSoup(season_response.text, "html.parser")
+                    for row in season_soup.find_all("tr"):
+                        cols = row.find_all("td")
+                        if len(cols) >= 5:
+                            match_date = cols[0].text.strip()
+                            round_name = cols[1].text.strip().lower()
+                            try:
+                                match_date_dt = datetime.strptime(match_date, "%d %b %y")
+                                if (match_date_dt >= start_date_dt
+                                    and any(r in round_name for r in details["round_filters"])):
+                                    filtered_matches.append(match_date_dt)
+                            except ValueError:
+                                continue
+                # Convert matches into time series format
+                df_sport = pd.DataFrame({"date": filtered_matches})
+                if df_sport.empty:
+                    continue
+                col_name = "seas_nfl" if sport == "nfl" else "seas_uefa_champions_league"
+                time_series_df.loc[time_series_df["date"].isin(df_sport["date"]), col_name] = 1
+            # Aggregate by week commencing
+            day_offsets = {
+                'mon': 'W-MON',
+                'tues': 'W-TUE',
+                'wed': 'W-WED',
+                'thurs': 'W-THU',
+                'fri': 'W-FRI',
+                'sat': 'W-SAT',
+                'sun': 'W-SUN'
+            }
+            if week_commencing.lower() not in day_offsets:
+                raise ValueError(f"Invalid week_commencing value: {week_commencing}. Must be one of {list(day_offsets.keys())}.")
+            time_series_df = (time_series_df
+                            .set_index("date")
+                            .resample(day_offsets[week_commencing.lower()])
+                            .max()
+                            .reset_index())
+            time_series_df.rename(columns={"date": "OBS"}, inplace=True)
+            time_series_df.fillna(0, inplace=True)
+            return time_series_df
+        ############################################################
+        # 2) FETCH FIFA WC, UEFA EURO, RUGBY, SIX NATIONS (2ND FUNC)
+        ############################################################
+        def fetch_events(start_date=start_date, week_commencing=week_commencing):
+            # Initialize date range
+            start_date_obj = datetime.strptime(start_date, '%Y-%m-%d')
+            end_date_obj = datetime.today()
+            date_range = pd.date_range(start=start_date_obj, end=end_date_obj)
+            df = pd.DataFrame({'OBS': date_range}).set_index('OBS')
+            # Define columns for sports
+            event_columns = {
+                'seas_fifa_world_cup': {
+                    'league_id': 4429, 'start_year': 1950, 'interval': 4
+                },
+                'seas_uefa_european_championship': {
+                    'league_id': 4502, 'start_year': 1960, 'interval': 4, 'extra_years': [2021]
+                },
+                'seas_rugby_world_cup': {
+                    'league_id': 4574, 'start_year': 1987, 'interval': 4
+                },
+                'seas_six_nations': {
+                    'league_id': 4714, 'start_year': 2000, 'interval': 1
+                },
+            }
+            # Initialize columns
+            for col in event_columns.keys():
+                df[col] = 0
+            def fetch_league_events(league_id, column_name, start_year, interval, extra_years=None):
+                extra_years = extra_years or []
+                # Fetch seasons
+                seasons_url = f"https://www.thesportsdb.com/api/v1/json/3/search_all_seasons.php?id={league_id}"
+                seasons_response = requests.get(seasons_url)
+                if seasons_response.status_code != 200:
+                    return  # Skip on failure
+                seasons_data = seasons_response.json().get('seasons', [])
+                for season in seasons_data:
+                    season_name = season.get('strSeason', '')
+                    if not season_name.isdigit():
+                        continue
+                    year = int(season_name)
+                    # Check if the year is valid for this competition
+                    if year in extra_years or (year >= start_year and (year - start_year) % interval == 0):
+                        # Fetch events
+                        events_url = f"https://www.thesportsdb.com/api/v1/json/3/eventsseason.php?id={league_id}&s={season_name}"
+                        events_response = requests.get(events_url)
+                        if events_response.status_code != 200:
+                            continue
+                        events_data = events_response.json().get('events', [])
+                        for event in events_data:
+                            event_date_str = event.get('dateEvent')
+                            if event_date_str:
+                                event_date = datetime.strptime(event_date_str, '%Y-%m-%d')
+                                if event_date in df.index:
+                                    df.loc[event_date, column_name] = 1
+            # Fetch events for all defined leagues
+            for column_name, params in event_columns.items():
+                fetch_league_events(
+                    league_id=params['league_id'],
+                    column_name=column_name,
+                    start_year=params['start_year'],
+                    interval=params['interval'],
+                    extra_years=params.get('extra_years', [])
+                )
+            # Resample by week
+            day_offsets = {
+                'mon': 'W-MON',
+                'tues': 'W-TUE',
+                'wed': 'W-WED',
+                'thurs': 'W-THU',
+                'fri': 'W-FRI',
+                'sat': 'W-SAT',
+                'sun': 'W-SUN'
+            }
+            if week_commencing.lower() not in day_offsets:
+                raise ValueError(
+                    f"Invalid week_commencing value: {week_commencing}. "
+                    f"Must be one of {list(day_offsets.keys())}."
+                )
+            df = df.resample(day_offsets[week_commencing.lower()]).max()
+            df = df.reset_index()
+            return df
+        ###################################################
+        # 3) CALL BOTH, THEN MERGE ON "OBS" & FILL WITH 0s
+        ###################################################
+        df_uefa_nfl = scrape_sports_events(start_date, week_commencing)
+        df_other_events = fetch_events(start_date, week_commencing)
+        # Merge on "OBS" column (outer join to preserve all dates in range)
+        final_df = pd.merge(df_uefa_nfl, df_other_events, on='OBS', how='outer')
+        # Fill any NaNs with 0 for event columns
+        # (Only fill numeric columns or everything except 'OBS')
+        for col in final_df.columns:
+            if col != 'OBS':
+                final_df[col] = final_df[col].fillna(0)
+        # Sort by date just in case
+        final_df.sort_values(by='OBS', inplace=True)
+        final_df.reset_index(drop=True, inplace=True)
+        return final_df