PyPI - imsciences - Versions diffs - 0.6.0.4__tar.gz → 0.6.0.7__tar.gz - Mend

@@ -898,7 +898,7 @@ class dataprocessing:
         start_year = int(starting_FY[2:])
         def calculate_FY_vectorized(date_series):
-            years_since_start = ((date_series - start_date).dt.days / 365.25).astype(int)
+            years_since_start = ((date_series - start_date).dt.days / 364).astype(int)
             fy = 'FY' + (start_year + years_since_start).astype(str)
             if short_format == "Yes":
                 fy = 'FY' + fy.str[-2:]
@@ -907,8 +907,14 @@ class dataprocessing:
         df['FY'] = calculate_FY_vectorized(df[index_col])
         if half_years == "Yes" or combined_FY_and_H == "Yes":
-            end_of_h1 = start_date + pd.DateOffset(months=6) - pd.DateOffset(days=1)
-            df['Half Years'] = np.where(df[index_col] <= end_of_h1, 'H1', 'H2')
+            def calculate_half_year_vectorized(date_series):
+                fy_years_since_start = ((date_series - start_date).dt.days / 364).astype(int)
+                fy_start_dates = start_date + fy_years_since_start * pd.DateOffset(years=1)
+                fy_end_of_h1 = fy_start_dates + pd.DateOffset(weeks=26) - pd.DateOffset(weeks=1)
+                half_year = np.where(date_series <= fy_end_of_h1, 'H1', 'H2')
+                return half_year
+            df['Half Years'] = calculate_half_year_vectorized(df[index_col])
         if combined_FY_and_H == "Yes":
             df['Financial Half Years'] = df['FY'] + ' ' + df['Half Years']
@@ -2102,11 +2108,17 @@ class datapull:
         return df_final_combined
     def pull_weather(self, week_commencing, country) -> pd.DataFrame:
+        import pandas as pd
+        import urllib.request
+        from datetime import datetime
+        import requests
+        from geopy.geocoders import Nominatim
         # Week commencing dictionary
         day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
         # Country dictionary
-        country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada"}
+        country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada", "ZAF": "ZA__ASOS"}
         # Function to flatten a list of nested lists into a list
         def flatten_list(nested_list):
@@ -2174,8 +2186,17 @@ class datapull:
                                     "&stations=CWIL", "&stations=CXWB", "&stations=CYZS",
                                     "&stations=CWJC", "&stations=CYFB", "&stations=CWUW"])
+        elif country == "ZA__ASOS":
+            cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
+            stations = []
+            for city in cities:
+                geolocator = Nominatim(user_agent="MyApp")
+                location = geolocator.geocode(city)
+                stations.append(f"&latitude={location.latitude}&longitude={location.longitude}")
         # Temperature
-        if country == "GB__ASOS" or country == "AU__ASOS" or country == "DE__ASOS" or country == "FR__ASOS":
+        if country in ["GB__ASOS", "AU__ASOS", "DE__ASOS", "FR__ASOS"]:
             # We start by making a data frame of the following weather stations
             station_query = ''.join(stations)
@@ -2284,6 +2305,76 @@ class datapull:
                                                 "min_temp_c": "avg_min_temp_c",
                                                 "mean_temp_c": "avg_mean_temp_c",
                                                 "precip_in": "avg_mean_perc"}, inplace=True)
+        elif country == "ZA__ASOS":
+            weather_data_list = []
+            for city in cities:
+                geolocator = Nominatim(user_agent="MyApp")
+                location = geolocator.geocode(city)
+                url = "https://archive-api.open-meteo.com/v1/archive"
+                params = {
+                    "latitude": location.latitude,
+                    "longitude": location.longitude,
+                    "start_date": formatted_date,
+                    "end_date": today.strftime("%Y-%m-%d"),
+                    "daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
+                    "timezone": "auto"
+                }
+                response = requests.get(url, params=params)
+                response_data = response.json()
+                daily_data = response_data["daily"]
+                dates = daily_data["time"]
+                data = pd.DataFrame({
+                    "day": dates,
+                    "max_temp_f": daily_data["temperature_2m_max"],
+                    "min_temp_f": daily_data["temperature_2m_min"],
+                    "precip_in": daily_data["precipitation_sum"]
+                })
+                data["city"] = city
+                weather_data_list.append(data)
+            weather = pd.concat(weather_data_list)
+            # Convert the date column to a Date type
+            weather["day"] = pd.to_datetime(weather["day"])
+            # Replace None values
+            weather["max_temp_f"].replace("None", 0, inplace=True)
+            weather["min_temp_f"].replace("None", 0, inplace=True)
+            weather["precip_in"].replace("None", 0, inplace=True)
+            weather[["max_temp_f", "min_temp_f", "precip_in"]] = weather[["max_temp_f", "min_temp_f", "precip_in"]].apply(pd.to_numeric)
+            # Estimate mean temperature
+            weather["mean_temp_f"] = (weather["max_temp_f"] + weather["min_temp_f"]) / 2
+            # Convert Fahrenheit to Celsius for max_temp_f
+            weather["max_temp_c"] = (weather["max_temp_f"] - 32) * 5 / 9
+            # Convert Fahrenheit to Celsius for min_temp_f
+            weather["min_temp_c"] = (weather["min_temp_f"] - 32) * 5 / 9
+            # Convert Fahrenheit to Celsius for mean_temp_f
+            weather["mean_temp_c"] = (weather["mean_temp_f"] - 32) * 5 / 9
+            # Determine the starting chosen day for each date
+            weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
+            # Group by week_starting and summarize
+            numeric_columns = weather.select_dtypes(include='number').columns
+            weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
+            weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
+                                            "min_temp_f": "avg_min_temp_f",
+                                            "mean_temp_f": "avg_mean_temp_f",
+                                            "max_temp_c": "avg_max_temp_c",
+                                            "min_temp_c": "avg_min_temp_c",
+                                            "mean_temp_c": "avg_mean_temp_c",
+                                            "precip_in": "avg_mean_perc"}, inplace=True)
         else:
             # We start by making a data frame of the following weather stations
             station_query = ''.join(stations)
@@ -2557,8 +2648,57 @@ class datapull:
             # Change index to datetime
             weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
+        elif country == "ZA__ASOS":
+            cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
+            start_date = formatted_date
+            end_date = today.strftime("%Y-%m-%d")
+            weather_data_list = []
+            for city in cities:
+                geolocator = Nominatim(user_agent="MyApp")
+                location = geolocator.geocode(city)
+                url = "https://archive-api.open-meteo.com/v1/archive"
+                params = {
+                    "latitude": location.latitude,
+                    "longitude": location.longitude,
+                    "start_date": start_date,
+                    "end_date": end_date,
+                    "daily": "precipitation_sum",
+                    "timezone": "auto"
+                }
+                response = requests.get(url, params=params)
+                response_data = response.json()
+                daily_data = response_data["daily"]["precipitation_sum"]
+                dates = response_data["daily"]["time"]
+                data = pd.DataFrame({"date": dates, "rainfall": daily_data})
+                data["city"] = city
+                weather_data_list.append(data)
+            # Combine all city data into a single data frame
+            all_weather_data = pd.concat(weather_data_list)
+            # Convert the date column to a Date type
+            all_weather_data["date"] = pd.to_datetime(all_weather_data["date"])
+            # Set week commencing col up
+            all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
+            # Group by week_starting and summarize
+            numeric_columns = all_weather_data.select_dtypes(include='number').columns
+            weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
+            weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
+            # Change index to datetime
+            weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
         # Merge the dataframes
-        if country == "AU__ASOS" or country == "DE__ASOS" or country == "FR__ASOS" or country == "GB__ASOS":
+        if country in ["AU__ASOS", "DE__ASOS", "FR__ASOS", "GB__ASOS", "ZA__ASOS"]:
             merged_df = weekly_avg_rain.merge(weekly_avg_temp, on="week_starting")
         else:
             merged_df = weekly_avg_temp
@@ -2569,3 +2709,4 @@ class datapull:
         final_weather = ims_proc.rename_cols(merged_df, 'seas_')
         return final_weather

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.0.4
+Version: 0.6.0.7
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.0.4
+Version: 0.6.0.7
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

@@ -8,7 +8,7 @@ def read_md(file_name):
             return f.read()
     return ''
-VERSION = '0.6.0.4'
+VERSION = '0.6.0.7'
 DESCRIPTION = 'IMS Data Processing Package'
 LONG_DESCRIPTION = read_md('README.md')  # Reading from README.md

imsciences 0.6.0.4__tar.gz → 0.6.0.7__tar.gz

imsciences 0.6.0.4tar.gz → 0.6.0.7tar.gz