imsciences 0.6.0.4__py3-none-any.whl → 0.6.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -898,7 +898,7 @@ class dataprocessing:
898
898
  start_year = int(starting_FY[2:])
899
899
 
900
900
  def calculate_FY_vectorized(date_series):
901
- years_since_start = ((date_series - start_date).dt.days / 365.25).astype(int)
901
+ years_since_start = ((date_series - start_date).dt.days / 364).astype(int)
902
902
  fy = 'FY' + (start_year + years_since_start).astype(str)
903
903
  if short_format == "Yes":
904
904
  fy = 'FY' + fy.str[-2:]
@@ -907,8 +907,14 @@ class dataprocessing:
907
907
  df['FY'] = calculate_FY_vectorized(df[index_col])
908
908
 
909
909
  if half_years == "Yes" or combined_FY_and_H == "Yes":
910
- end_of_h1 = start_date + pd.DateOffset(months=6) - pd.DateOffset(days=1)
911
- df['Half Years'] = np.where(df[index_col] <= end_of_h1, 'H1', 'H2')
910
+ def calculate_half_year_vectorized(date_series):
911
+ fy_years_since_start = ((date_series - start_date).dt.days / 364).astype(int)
912
+ fy_start_dates = start_date + fy_years_since_start * pd.DateOffset(years=1)
913
+ fy_end_of_h1 = fy_start_dates + pd.DateOffset(weeks=26) - pd.DateOffset(weeks=1)
914
+ half_year = np.where(date_series <= fy_end_of_h1, 'H1', 'H2')
915
+ return half_year
916
+
917
+ df['Half Years'] = calculate_half_year_vectorized(df[index_col])
912
918
 
913
919
  if combined_FY_and_H == "Yes":
914
920
  df['Financial Half Years'] = df['FY'] + ' ' + df['Half Years']
@@ -2102,11 +2108,17 @@ class datapull:
2102
2108
  return df_final_combined
2103
2109
 
2104
2110
  def pull_weather(self, week_commencing, country) -> pd.DataFrame:
2111
+ import pandas as pd
2112
+ import urllib.request
2113
+ from datetime import datetime
2114
+ import requests
2115
+ from geopy.geocoders import Nominatim
2116
+
2105
2117
  # Week commencing dictionary
2106
2118
  day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
2107
2119
 
2108
2120
  # Country dictionary
2109
- country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada"}
2121
+ country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada", "ZAF": "ZA__ASOS"}
2110
2122
 
2111
2123
  # Function to flatten a list of nested lists into a list
2112
2124
  def flatten_list(nested_list):
@@ -2174,8 +2186,17 @@ class datapull:
2174
2186
  "&stations=CWIL", "&stations=CXWB", "&stations=CYZS",
2175
2187
  "&stations=CWJC", "&stations=CYFB", "&stations=CWUW"])
2176
2188
 
2189
+ elif country == "ZA__ASOS":
2190
+ cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
2191
+ stations = []
2192
+
2193
+ for city in cities:
2194
+ geolocator = Nominatim(user_agent="MyApp")
2195
+ location = geolocator.geocode(city)
2196
+ stations.append(f"&latitude={location.latitude}&longitude={location.longitude}")
2197
+
2177
2198
  # Temperature
2178
- if country == "GB__ASOS" or country == "AU__ASOS" or country == "DE__ASOS" or country == "FR__ASOS":
2199
+ if country in ["GB__ASOS", "AU__ASOS", "DE__ASOS", "FR__ASOS"]:
2179
2200
  # We start by making a data frame of the following weather stations
2180
2201
  station_query = ''.join(stations)
2181
2202
 
@@ -2284,6 +2305,76 @@ class datapull:
2284
2305
  "min_temp_c": "avg_min_temp_c",
2285
2306
  "mean_temp_c": "avg_mean_temp_c",
2286
2307
  "precip_in": "avg_mean_perc"}, inplace=True)
2308
+ elif country == "ZA__ASOS":
2309
+ weather_data_list = []
2310
+
2311
+ for city in cities:
2312
+ geolocator = Nominatim(user_agent="MyApp")
2313
+ location = geolocator.geocode(city)
2314
+ url = "https://archive-api.open-meteo.com/v1/archive"
2315
+
2316
+ params = {
2317
+ "latitude": location.latitude,
2318
+ "longitude": location.longitude,
2319
+ "start_date": formatted_date,
2320
+ "end_date": today.strftime("%Y-%m-%d"),
2321
+ "daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
2322
+ "timezone": "auto"
2323
+ }
2324
+
2325
+ response = requests.get(url, params=params)
2326
+ response_data = response.json()
2327
+
2328
+ daily_data = response_data["daily"]
2329
+ dates = daily_data["time"]
2330
+
2331
+ data = pd.DataFrame({
2332
+ "day": dates,
2333
+ "max_temp_f": daily_data["temperature_2m_max"],
2334
+ "min_temp_f": daily_data["temperature_2m_min"],
2335
+ "precip_in": daily_data["precipitation_sum"]
2336
+ })
2337
+ data["city"] = city
2338
+ weather_data_list.append(data)
2339
+
2340
+ weather = pd.concat(weather_data_list)
2341
+
2342
+ # Convert the date column to a Date type
2343
+ weather["day"] = pd.to_datetime(weather["day"])
2344
+
2345
+ # Replace None values
2346
+ weather["max_temp_f"].replace("None", 0, inplace=True)
2347
+ weather["min_temp_f"].replace("None", 0, inplace=True)
2348
+ weather["precip_in"].replace("None", 0, inplace=True)
2349
+
2350
+ weather[["max_temp_f", "min_temp_f", "precip_in"]] = weather[["max_temp_f", "min_temp_f", "precip_in"]].apply(pd.to_numeric)
2351
+
2352
+ # Estimate mean temperature
2353
+ weather["mean_temp_f"] = (weather["max_temp_f"] + weather["min_temp_f"]) / 2
2354
+
2355
+ # Convert Fahrenheit to Celsius for max_temp_f
2356
+ weather["max_temp_c"] = (weather["max_temp_f"] - 32) * 5 / 9
2357
+
2358
+ # Convert Fahrenheit to Celsius for min_temp_f
2359
+ weather["min_temp_c"] = (weather["min_temp_f"] - 32) * 5 / 9
2360
+
2361
+ # Convert Fahrenheit to Celsius for mean_temp_f
2362
+ weather["mean_temp_c"] = (weather["mean_temp_f"] - 32) * 5 / 9
2363
+
2364
+ # Determine the starting chosen day for each date
2365
+ weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
2366
+
2367
+ # Group by week_starting and summarize
2368
+ numeric_columns = weather.select_dtypes(include='number').columns
2369
+ weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
2370
+ weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
2371
+ "min_temp_f": "avg_min_temp_f",
2372
+ "mean_temp_f": "avg_mean_temp_f",
2373
+ "max_temp_c": "avg_max_temp_c",
2374
+ "min_temp_c": "avg_min_temp_c",
2375
+ "mean_temp_c": "avg_mean_temp_c",
2376
+ "precip_in": "avg_mean_perc"}, inplace=True)
2377
+
2287
2378
  else:
2288
2379
  # We start by making a data frame of the following weather stations
2289
2380
  station_query = ''.join(stations)
@@ -2557,8 +2648,57 @@ class datapull:
2557
2648
  # Change index to datetime
2558
2649
  weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
2559
2650
 
2651
+ elif country == "ZA__ASOS":
2652
+ cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
2653
+ start_date = formatted_date
2654
+ end_date = today.strftime("%Y-%m-%d")
2655
+
2656
+ weather_data_list = []
2657
+
2658
+ for city in cities:
2659
+ geolocator = Nominatim(user_agent="MyApp")
2660
+ location = geolocator.geocode(city)
2661
+ url = "https://archive-api.open-meteo.com/v1/archive"
2662
+
2663
+ params = {
2664
+ "latitude": location.latitude,
2665
+ "longitude": location.longitude,
2666
+ "start_date": start_date,
2667
+ "end_date": end_date,
2668
+ "daily": "precipitation_sum",
2669
+ "timezone": "auto"
2670
+ }
2671
+
2672
+ response = requests.get(url, params=params)
2673
+ response_data = response.json()
2674
+
2675
+ daily_data = response_data["daily"]["precipitation_sum"]
2676
+ dates = response_data["daily"]["time"]
2677
+
2678
+ data = pd.DataFrame({"date": dates, "rainfall": daily_data})
2679
+ data["city"] = city
2680
+
2681
+ weather_data_list.append(data)
2682
+
2683
+ # Combine all city data into a single data frame
2684
+ all_weather_data = pd.concat(weather_data_list)
2685
+
2686
+ # Convert the date column to a Date type
2687
+ all_weather_data["date"] = pd.to_datetime(all_weather_data["date"])
2688
+
2689
+ # Set week commencing col up
2690
+ all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
2691
+
2692
+ # Group by week_starting and summarize
2693
+ numeric_columns = all_weather_data.select_dtypes(include='number').columns
2694
+ weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
2695
+ weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
2696
+
2697
+ # Change index to datetime
2698
+ weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
2699
+
2560
2700
  # Merge the dataframes
2561
- if country == "AU__ASOS" or country == "DE__ASOS" or country == "FR__ASOS" or country == "GB__ASOS":
2701
+ if country in ["AU__ASOS", "DE__ASOS", "FR__ASOS", "GB__ASOS", "ZA__ASOS"]:
2562
2702
  merged_df = weekly_avg_rain.merge(weekly_avg_temp, on="week_starting")
2563
2703
  else:
2564
2704
  merged_df = weekly_avg_temp
@@ -2569,3 +2709,4 @@ class datapull:
2569
2709
  final_weather = ims_proc.rename_cols(merged_df, 'seas_')
2570
2710
 
2571
2711
  return final_weather
2712
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.6.0.4
3
+ Version: 0.6.0.7
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
2
2
  dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
3
3
  dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
4
4
  imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
5
- imsciences/datafunctions.py,sha256=Fr87wDxHy7wVfBsYpj_s6r3OxxsVslnFiWRdoBxYplI,135006
5
+ imsciences/datafunctions.py,sha256=9aL25wdMwxOsE5ee173NaRBCx6zQVuXI_ZGS_Hmf29w,141390
6
6
  imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
7
7
  imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
8
8
  imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
9
9
  imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
10
10
  imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
11
- imsciences-0.6.0.4.dist-info/METADATA,sha256=qnM9QQ1VR2rwX0lOMEVxL99CKDZdnbutZiO4h46zSKU,854
12
- imsciences-0.6.0.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
- imsciences-0.6.0.4.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
14
- imsciences-0.6.0.4.dist-info/RECORD,,
11
+ imsciences-0.6.0.7.dist-info/METADATA,sha256=-u5hFYdnRM5zE5Buit8tX-2zu-YbmUgoU-51jqA7wMo,854
12
+ imsciences-0.6.0.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
+ imsciences-0.6.0.7.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
14
+ imsciences-0.6.0.7.dist-info/RECORD,,