PyPI - imsciences - Versions diffs - 0.6.0.5__tar.gz → 0.6.0.8__tar.gz - Mend

@@ -9,7 +9,7 @@ import re
 import pandas as pd
 from fredapi import Fred
 import time
-from datetime import datetime
+from datetime import datetime,timedelta
 from cif import cif
 from io import StringIO
 import urllib
@@ -440,7 +440,7 @@ class dataprocessing:
         return fig
-    def week_of_year_mapping(self, df, week_col, start_day_str):
+    def week_of_year_mapping(df, week_col, start_day_str):
         # Mapping of string day names to day numbers (1 for Monday, 7 for Sunday)
         day_mapping = {
@@ -456,15 +456,15 @@ class dataprocessing:
         def week_to_startdate(week_str, start_day):
             year, week = map(int, week_str.split('-W'))
             first_day_of_year = datetime(year, 1, 1)
-            day_of_week = first_day_of_year.isocalendar()[2]
-            days_to_add = (7 - day_of_week + 1) if day_of_week > 4 else (1 - day_of_week)
-            start_of_iso_week = first_day_of_year + datetime.timedelta(days=days_to_add)
+            first_weekday_of_year = first_day_of_year.weekday()  # Monday is 0 and Sunday is 6
-            # Adjust start day
-            days_to_shift = (start_day - 1) % 7
-            start_of_week = start_of_iso_week + datetime.timedelta(days=days_to_shift)
+            # Calculate days to adjust to the desired start day of the week
+            days_to_adjust = (start_day - 1 - first_weekday_of_year) % 7
+            start_of_iso_week = first_day_of_year + timedelta(days=days_to_adjust)
-            return start_of_week + datetime.timedelta(weeks=week - 1)
+            # Calculate the start of the desired week
+            start_of_week = start_of_iso_week + timedelta(weeks=week - 1)
+            return start_of_week
         # Apply the function to each row in the specified week column
         df['OBS'] = df[week_col].apply(lambda x: week_to_startdate(x, start_day)).dt.strftime('%d/%m/%Y')
@@ -1334,7 +1334,7 @@ class datapull:
         print("   - Usage: pull_ons_data(series_list, week_commencing)")
         print("   - Example: pull_ons_data([{'series_id': 'LMSBSA', 'dataset_id': 'LMS'}], 'mon')")
-        print("\n4. pull_macro")
+        print("\n4. pull_oecd")
         print("   - Description: Fetch macroeconomic data from OECD and other sources for a specified country.")
         print("   - Usage: pull_macro(country='GBR', week_commencing='mon')")
         print("   - Example: pull_macro('GBR', 'mon')")
@@ -2108,11 +2108,17 @@ class datapull:
         return df_final_combined
     def pull_weather(self, week_commencing, country) -> pd.DataFrame:
+        import pandas as pd
+        import urllib.request
+        from datetime import datetime
+        import requests
+        from geopy.geocoders import Nominatim
         # Week commencing dictionary
         day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
         # Country dictionary
-        country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada"}
+        country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada", "ZAF": "ZA__ASOS"}
         # Function to flatten a list of nested lists into a list
         def flatten_list(nested_list):
@@ -2180,8 +2186,17 @@ class datapull:
                                     "&stations=CWIL", "&stations=CXWB", "&stations=CYZS",
                                     "&stations=CWJC", "&stations=CYFB", "&stations=CWUW"])
+        elif country == "ZA__ASOS":
+            cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
+            stations = []
+            for city in cities:
+                geolocator = Nominatim(user_agent="MyApp")
+                location = geolocator.geocode(city)
+                stations.append(f"&latitude={location.latitude}&longitude={location.longitude}")
         # Temperature
-        if country == "GB__ASOS" or country == "AU__ASOS" or country == "DE__ASOS" or country == "FR__ASOS":
+        if country in ["GB__ASOS", "AU__ASOS", "DE__ASOS", "FR__ASOS"]:
             # We start by making a data frame of the following weather stations
             station_query = ''.join(stations)
@@ -2290,6 +2305,76 @@ class datapull:
                                                 "min_temp_c": "avg_min_temp_c",
                                                 "mean_temp_c": "avg_mean_temp_c",
                                                 "precip_in": "avg_mean_perc"}, inplace=True)
+        elif country == "ZA__ASOS":
+            weather_data_list = []
+            for city in cities:
+                geolocator = Nominatim(user_agent="MyApp")
+                location = geolocator.geocode(city)
+                url = "https://archive-api.open-meteo.com/v1/archive"
+                params = {
+                    "latitude": location.latitude,
+                    "longitude": location.longitude,
+                    "start_date": formatted_date,
+                    "end_date": today.strftime("%Y-%m-%d"),
+                    "daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
+                    "timezone": "auto"
+                }
+                response = requests.get(url, params=params)
+                response_data = response.json()
+                daily_data = response_data["daily"]
+                dates = daily_data["time"]
+                data = pd.DataFrame({
+                    "day": dates,
+                    "max_temp_f": daily_data["temperature_2m_max"],
+                    "min_temp_f": daily_data["temperature_2m_min"],
+                    "precip_in": daily_data["precipitation_sum"]
+                })
+                data["city"] = city
+                weather_data_list.append(data)
+            weather = pd.concat(weather_data_list)
+            # Convert the date column to a Date type
+            weather["day"] = pd.to_datetime(weather["day"])
+            # Replace None values
+            weather["max_temp_f"].replace("None", 0, inplace=True)
+            weather["min_temp_f"].replace("None", 0, inplace=True)
+            weather["precip_in"].replace("None", 0, inplace=True)
+            weather[["max_temp_f", "min_temp_f", "precip_in"]] = weather[["max_temp_f", "min_temp_f", "precip_in"]].apply(pd.to_numeric)
+            # Estimate mean temperature
+            weather["mean_temp_f"] = (weather["max_temp_f"] + weather["min_temp_f"]) / 2
+            # Convert Fahrenheit to Celsius for max_temp_f
+            weather["max_temp_c"] = (weather["max_temp_f"] - 32) * 5 / 9
+            # Convert Fahrenheit to Celsius for min_temp_f
+            weather["min_temp_c"] = (weather["min_temp_f"] - 32) * 5 / 9
+            # Convert Fahrenheit to Celsius for mean_temp_f
+            weather["mean_temp_c"] = (weather["mean_temp_f"] - 32) * 5 / 9
+            # Determine the starting chosen day for each date
+            weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
+            # Group by week_starting and summarize
+            numeric_columns = weather.select_dtypes(include='number').columns
+            weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
+            weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
+                                            "min_temp_f": "avg_min_temp_f",
+                                            "mean_temp_f": "avg_mean_temp_f",
+                                            "max_temp_c": "avg_max_temp_c",
+                                            "min_temp_c": "avg_min_temp_c",
+                                            "mean_temp_c": "avg_mean_temp_c",
+                                            "precip_in": "avg_mean_perc"}, inplace=True)
         else:
             # We start by making a data frame of the following weather stations
             station_query = ''.join(stations)
@@ -2563,8 +2648,57 @@ class datapull:
             # Change index to datetime
             weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
+        elif country == "ZA__ASOS":
+            cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
+            start_date = formatted_date
+            end_date = today.strftime("%Y-%m-%d")
+            weather_data_list = []
+            for city in cities:
+                geolocator = Nominatim(user_agent="MyApp")
+                location = geolocator.geocode(city)
+                url = "https://archive-api.open-meteo.com/v1/archive"
+                params = {
+                    "latitude": location.latitude,
+                    "longitude": location.longitude,
+                    "start_date": start_date,
+                    "end_date": end_date,
+                    "daily": "precipitation_sum",
+                    "timezone": "auto"
+                }
+                response = requests.get(url, params=params)
+                response_data = response.json()
+                daily_data = response_data["daily"]["precipitation_sum"]
+                dates = response_data["daily"]["time"]
+                data = pd.DataFrame({"date": dates, "rainfall": daily_data})
+                data["city"] = city
+                weather_data_list.append(data)
+            # Combine all city data into a single data frame
+            all_weather_data = pd.concat(weather_data_list)
+            # Convert the date column to a Date type
+            all_weather_data["date"] = pd.to_datetime(all_weather_data["date"])
+            # Set week commencing col up
+            all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
+            # Group by week_starting and summarize
+            numeric_columns = all_weather_data.select_dtypes(include='number').columns
+            weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
+            weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
+            # Change index to datetime
+            weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
         # Merge the dataframes
-        if country == "AU__ASOS" or country == "DE__ASOS" or country == "FR__ASOS" or country == "GB__ASOS":
+        if country in ["AU__ASOS", "DE__ASOS", "FR__ASOS", "GB__ASOS", "ZA__ASOS"]:
             merged_df = weekly_avg_rain.merge(weekly_avg_temp, on="week_starting")
         else:
             merged_df = weekly_avg_temp
@@ -2574,4 +2708,4 @@ class datapull:
         final_weather = ims_proc.rename_cols(merged_df, 'seas_')
-        return final_weather
+        return final_weather

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.0.5
+Version: 0.6.0.8
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.0.5
+Version: 0.6.0.8
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

@@ -8,7 +8,7 @@ def read_md(file_name):
             return f.read()
     return ''
-VERSION = '0.6.0.5'
+VERSION = '0.6.0.8'
 DESCRIPTION = 'IMS Data Processing Package'
 LONG_DESCRIPTION = read_md('README.md')  # Reading from README.md

imsciences 0.6.0.5__tar.gz → 0.6.0.8__tar.gz

imsciences 0.6.0.5tar.gz → 0.6.0.8tar.gz