imsciences 0.6.0.4__tar.gz → 0.6.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {imsciences-0.6.0.4 → imsciences-0.6.0.7}/PKG-INFO +1 -1
- {imsciences-0.6.0.4 → imsciences-0.6.0.7}/imsciences/datafunctions.py +147 -6
- {imsciences-0.6.0.4 → imsciences-0.6.0.7}/imsciences.egg-info/PKG-INFO +1 -1
- {imsciences-0.6.0.4 → imsciences-0.6.0.7}/setup.py +1 -1
- {imsciences-0.6.0.4 → imsciences-0.6.0.7}/README.md +0 -0
- {imsciences-0.6.0.4 → imsciences-0.6.0.7}/imsciences/__init__.py +0 -0
- {imsciences-0.6.0.4 → imsciences-0.6.0.7}/imsciences.egg-info/SOURCES.txt +0 -0
- {imsciences-0.6.0.4 → imsciences-0.6.0.7}/imsciences.egg-info/dependency_links.txt +0 -0
- {imsciences-0.6.0.4 → imsciences-0.6.0.7}/imsciences.egg-info/requires.txt +0 -0
- {imsciences-0.6.0.4 → imsciences-0.6.0.7}/imsciences.egg-info/top_level.txt +0 -0
- {imsciences-0.6.0.4 → imsciences-0.6.0.7}/setup.cfg +0 -0
|
@@ -898,7 +898,7 @@ class dataprocessing:
|
|
|
898
898
|
start_year = int(starting_FY[2:])
|
|
899
899
|
|
|
900
900
|
def calculate_FY_vectorized(date_series):
|
|
901
|
-
years_since_start = ((date_series - start_date).dt.days /
|
|
901
|
+
years_since_start = ((date_series - start_date).dt.days / 364).astype(int)
|
|
902
902
|
fy = 'FY' + (start_year + years_since_start).astype(str)
|
|
903
903
|
if short_format == "Yes":
|
|
904
904
|
fy = 'FY' + fy.str[-2:]
|
|
@@ -907,8 +907,14 @@ class dataprocessing:
|
|
|
907
907
|
df['FY'] = calculate_FY_vectorized(df[index_col])
|
|
908
908
|
|
|
909
909
|
if half_years == "Yes" or combined_FY_and_H == "Yes":
|
|
910
|
-
|
|
911
|
-
|
|
910
|
+
def calculate_half_year_vectorized(date_series):
|
|
911
|
+
fy_years_since_start = ((date_series - start_date).dt.days / 364).astype(int)
|
|
912
|
+
fy_start_dates = start_date + fy_years_since_start * pd.DateOffset(years=1)
|
|
913
|
+
fy_end_of_h1 = fy_start_dates + pd.DateOffset(weeks=26) - pd.DateOffset(weeks=1)
|
|
914
|
+
half_year = np.where(date_series <= fy_end_of_h1, 'H1', 'H2')
|
|
915
|
+
return half_year
|
|
916
|
+
|
|
917
|
+
df['Half Years'] = calculate_half_year_vectorized(df[index_col])
|
|
912
918
|
|
|
913
919
|
if combined_FY_and_H == "Yes":
|
|
914
920
|
df['Financial Half Years'] = df['FY'] + ' ' + df['Half Years']
|
|
@@ -2102,11 +2108,17 @@ class datapull:
|
|
|
2102
2108
|
return df_final_combined
|
|
2103
2109
|
|
|
2104
2110
|
def pull_weather(self, week_commencing, country) -> pd.DataFrame:
|
|
2111
|
+
import pandas as pd
|
|
2112
|
+
import urllib.request
|
|
2113
|
+
from datetime import datetime
|
|
2114
|
+
import requests
|
|
2115
|
+
from geopy.geocoders import Nominatim
|
|
2116
|
+
|
|
2105
2117
|
# Week commencing dictionary
|
|
2106
2118
|
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
2107
2119
|
|
|
2108
2120
|
# Country dictionary
|
|
2109
|
-
country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada"}
|
|
2121
|
+
country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada", "ZAF": "ZA__ASOS"}
|
|
2110
2122
|
|
|
2111
2123
|
# Function to flatten a list of nested lists into a list
|
|
2112
2124
|
def flatten_list(nested_list):
|
|
@@ -2174,8 +2186,17 @@ class datapull:
|
|
|
2174
2186
|
"&stations=CWIL", "&stations=CXWB", "&stations=CYZS",
|
|
2175
2187
|
"&stations=CWJC", "&stations=CYFB", "&stations=CWUW"])
|
|
2176
2188
|
|
|
2189
|
+
elif country == "ZA__ASOS":
|
|
2190
|
+
cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
|
|
2191
|
+
stations = []
|
|
2192
|
+
|
|
2193
|
+
for city in cities:
|
|
2194
|
+
geolocator = Nominatim(user_agent="MyApp")
|
|
2195
|
+
location = geolocator.geocode(city)
|
|
2196
|
+
stations.append(f"&latitude={location.latitude}&longitude={location.longitude}")
|
|
2197
|
+
|
|
2177
2198
|
# Temperature
|
|
2178
|
-
if country
|
|
2199
|
+
if country in ["GB__ASOS", "AU__ASOS", "DE__ASOS", "FR__ASOS"]:
|
|
2179
2200
|
# We start by making a data frame of the following weather stations
|
|
2180
2201
|
station_query = ''.join(stations)
|
|
2181
2202
|
|
|
@@ -2284,6 +2305,76 @@ class datapull:
|
|
|
2284
2305
|
"min_temp_c": "avg_min_temp_c",
|
|
2285
2306
|
"mean_temp_c": "avg_mean_temp_c",
|
|
2286
2307
|
"precip_in": "avg_mean_perc"}, inplace=True)
|
|
2308
|
+
elif country == "ZA__ASOS":
|
|
2309
|
+
weather_data_list = []
|
|
2310
|
+
|
|
2311
|
+
for city in cities:
|
|
2312
|
+
geolocator = Nominatim(user_agent="MyApp")
|
|
2313
|
+
location = geolocator.geocode(city)
|
|
2314
|
+
url = "https://archive-api.open-meteo.com/v1/archive"
|
|
2315
|
+
|
|
2316
|
+
params = {
|
|
2317
|
+
"latitude": location.latitude,
|
|
2318
|
+
"longitude": location.longitude,
|
|
2319
|
+
"start_date": formatted_date,
|
|
2320
|
+
"end_date": today.strftime("%Y-%m-%d"),
|
|
2321
|
+
"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
|
|
2322
|
+
"timezone": "auto"
|
|
2323
|
+
}
|
|
2324
|
+
|
|
2325
|
+
response = requests.get(url, params=params)
|
|
2326
|
+
response_data = response.json()
|
|
2327
|
+
|
|
2328
|
+
daily_data = response_data["daily"]
|
|
2329
|
+
dates = daily_data["time"]
|
|
2330
|
+
|
|
2331
|
+
data = pd.DataFrame({
|
|
2332
|
+
"day": dates,
|
|
2333
|
+
"max_temp_f": daily_data["temperature_2m_max"],
|
|
2334
|
+
"min_temp_f": daily_data["temperature_2m_min"],
|
|
2335
|
+
"precip_in": daily_data["precipitation_sum"]
|
|
2336
|
+
})
|
|
2337
|
+
data["city"] = city
|
|
2338
|
+
weather_data_list.append(data)
|
|
2339
|
+
|
|
2340
|
+
weather = pd.concat(weather_data_list)
|
|
2341
|
+
|
|
2342
|
+
# Convert the date column to a Date type
|
|
2343
|
+
weather["day"] = pd.to_datetime(weather["day"])
|
|
2344
|
+
|
|
2345
|
+
# Replace None values
|
|
2346
|
+
weather["max_temp_f"].replace("None", 0, inplace=True)
|
|
2347
|
+
weather["min_temp_f"].replace("None", 0, inplace=True)
|
|
2348
|
+
weather["precip_in"].replace("None", 0, inplace=True)
|
|
2349
|
+
|
|
2350
|
+
weather[["max_temp_f", "min_temp_f", "precip_in"]] = weather[["max_temp_f", "min_temp_f", "precip_in"]].apply(pd.to_numeric)
|
|
2351
|
+
|
|
2352
|
+
# Estimate mean temperature
|
|
2353
|
+
weather["mean_temp_f"] = (weather["max_temp_f"] + weather["min_temp_f"]) / 2
|
|
2354
|
+
|
|
2355
|
+
# Convert Fahrenheit to Celsius for max_temp_f
|
|
2356
|
+
weather["max_temp_c"] = (weather["max_temp_f"] - 32) * 5 / 9
|
|
2357
|
+
|
|
2358
|
+
# Convert Fahrenheit to Celsius for min_temp_f
|
|
2359
|
+
weather["min_temp_c"] = (weather["min_temp_f"] - 32) * 5 / 9
|
|
2360
|
+
|
|
2361
|
+
# Convert Fahrenheit to Celsius for mean_temp_f
|
|
2362
|
+
weather["mean_temp_c"] = (weather["mean_temp_f"] - 32) * 5 / 9
|
|
2363
|
+
|
|
2364
|
+
# Determine the starting chosen day for each date
|
|
2365
|
+
weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2366
|
+
|
|
2367
|
+
# Group by week_starting and summarize
|
|
2368
|
+
numeric_columns = weather.select_dtypes(include='number').columns
|
|
2369
|
+
weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
|
|
2370
|
+
weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
|
|
2371
|
+
"min_temp_f": "avg_min_temp_f",
|
|
2372
|
+
"mean_temp_f": "avg_mean_temp_f",
|
|
2373
|
+
"max_temp_c": "avg_max_temp_c",
|
|
2374
|
+
"min_temp_c": "avg_min_temp_c",
|
|
2375
|
+
"mean_temp_c": "avg_mean_temp_c",
|
|
2376
|
+
"precip_in": "avg_mean_perc"}, inplace=True)
|
|
2377
|
+
|
|
2287
2378
|
else:
|
|
2288
2379
|
# We start by making a data frame of the following weather stations
|
|
2289
2380
|
station_query = ''.join(stations)
|
|
@@ -2557,8 +2648,57 @@ class datapull:
|
|
|
2557
2648
|
# Change index to datetime
|
|
2558
2649
|
weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
|
|
2559
2650
|
|
|
2651
|
+
elif country == "ZA__ASOS":
|
|
2652
|
+
cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
|
|
2653
|
+
start_date = formatted_date
|
|
2654
|
+
end_date = today.strftime("%Y-%m-%d")
|
|
2655
|
+
|
|
2656
|
+
weather_data_list = []
|
|
2657
|
+
|
|
2658
|
+
for city in cities:
|
|
2659
|
+
geolocator = Nominatim(user_agent="MyApp")
|
|
2660
|
+
location = geolocator.geocode(city)
|
|
2661
|
+
url = "https://archive-api.open-meteo.com/v1/archive"
|
|
2662
|
+
|
|
2663
|
+
params = {
|
|
2664
|
+
"latitude": location.latitude,
|
|
2665
|
+
"longitude": location.longitude,
|
|
2666
|
+
"start_date": start_date,
|
|
2667
|
+
"end_date": end_date,
|
|
2668
|
+
"daily": "precipitation_sum",
|
|
2669
|
+
"timezone": "auto"
|
|
2670
|
+
}
|
|
2671
|
+
|
|
2672
|
+
response = requests.get(url, params=params)
|
|
2673
|
+
response_data = response.json()
|
|
2674
|
+
|
|
2675
|
+
daily_data = response_data["daily"]["precipitation_sum"]
|
|
2676
|
+
dates = response_data["daily"]["time"]
|
|
2677
|
+
|
|
2678
|
+
data = pd.DataFrame({"date": dates, "rainfall": daily_data})
|
|
2679
|
+
data["city"] = city
|
|
2680
|
+
|
|
2681
|
+
weather_data_list.append(data)
|
|
2682
|
+
|
|
2683
|
+
# Combine all city data into a single data frame
|
|
2684
|
+
all_weather_data = pd.concat(weather_data_list)
|
|
2685
|
+
|
|
2686
|
+
# Convert the date column to a Date type
|
|
2687
|
+
all_weather_data["date"] = pd.to_datetime(all_weather_data["date"])
|
|
2688
|
+
|
|
2689
|
+
# Set week commencing col up
|
|
2690
|
+
all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2691
|
+
|
|
2692
|
+
# Group by week_starting and summarize
|
|
2693
|
+
numeric_columns = all_weather_data.select_dtypes(include='number').columns
|
|
2694
|
+
weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
|
|
2695
|
+
weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
|
|
2696
|
+
|
|
2697
|
+
# Change index to datetime
|
|
2698
|
+
weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
|
|
2699
|
+
|
|
2560
2700
|
# Merge the dataframes
|
|
2561
|
-
if country
|
|
2701
|
+
if country in ["AU__ASOS", "DE__ASOS", "FR__ASOS", "GB__ASOS", "ZA__ASOS"]:
|
|
2562
2702
|
merged_df = weekly_avg_rain.merge(weekly_avg_temp, on="week_starting")
|
|
2563
2703
|
else:
|
|
2564
2704
|
merged_df = weekly_avg_temp
|
|
@@ -2569,3 +2709,4 @@ class datapull:
|
|
|
2569
2709
|
final_weather = ims_proc.rename_cols(merged_df, 'seas_')
|
|
2570
2710
|
|
|
2571
2711
|
return final_weather
|
|
2712
|
+
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|