imsciences 0.6.0.5__tar.gz → 0.6.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {imsciences-0.6.0.5 → imsciences-0.6.0.7}/PKG-INFO +1 -1
- {imsciences-0.6.0.5 → imsciences-0.6.0.7}/imsciences/datafunctions.py +138 -3
- {imsciences-0.6.0.5 → imsciences-0.6.0.7}/imsciences.egg-info/PKG-INFO +1 -1
- {imsciences-0.6.0.5 → imsciences-0.6.0.7}/setup.py +1 -1
- {imsciences-0.6.0.5 → imsciences-0.6.0.7}/README.md +0 -0
- {imsciences-0.6.0.5 → imsciences-0.6.0.7}/imsciences/__init__.py +0 -0
- {imsciences-0.6.0.5 → imsciences-0.6.0.7}/imsciences.egg-info/SOURCES.txt +0 -0
- {imsciences-0.6.0.5 → imsciences-0.6.0.7}/imsciences.egg-info/dependency_links.txt +0 -0
- {imsciences-0.6.0.5 → imsciences-0.6.0.7}/imsciences.egg-info/requires.txt +0 -0
- {imsciences-0.6.0.5 → imsciences-0.6.0.7}/imsciences.egg-info/top_level.txt +0 -0
- {imsciences-0.6.0.5 → imsciences-0.6.0.7}/setup.cfg +0 -0
|
@@ -2108,11 +2108,17 @@ class datapull:
|
|
|
2108
2108
|
return df_final_combined
|
|
2109
2109
|
|
|
2110
2110
|
def pull_weather(self, week_commencing, country) -> pd.DataFrame:
|
|
2111
|
+
import pandas as pd
|
|
2112
|
+
import urllib.request
|
|
2113
|
+
from datetime import datetime
|
|
2114
|
+
import requests
|
|
2115
|
+
from geopy.geocoders import Nominatim
|
|
2116
|
+
|
|
2111
2117
|
# Week commencing dictionary
|
|
2112
2118
|
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
2113
2119
|
|
|
2114
2120
|
# Country dictionary
|
|
2115
|
-
country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada"}
|
|
2121
|
+
country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada", "ZAF": "ZA__ASOS"}
|
|
2116
2122
|
|
|
2117
2123
|
# Function to flatten a list of nested lists into a list
|
|
2118
2124
|
def flatten_list(nested_list):
|
|
@@ -2180,8 +2186,17 @@ class datapull:
|
|
|
2180
2186
|
"&stations=CWIL", "&stations=CXWB", "&stations=CYZS",
|
|
2181
2187
|
"&stations=CWJC", "&stations=CYFB", "&stations=CWUW"])
|
|
2182
2188
|
|
|
2189
|
+
elif country == "ZA__ASOS":
|
|
2190
|
+
cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
|
|
2191
|
+
stations = []
|
|
2192
|
+
|
|
2193
|
+
for city in cities:
|
|
2194
|
+
geolocator = Nominatim(user_agent="MyApp")
|
|
2195
|
+
location = geolocator.geocode(city)
|
|
2196
|
+
stations.append(f"&latitude={location.latitude}&longitude={location.longitude}")
|
|
2197
|
+
|
|
2183
2198
|
# Temperature
|
|
2184
|
-
if country
|
|
2199
|
+
if country in ["GB__ASOS", "AU__ASOS", "DE__ASOS", "FR__ASOS"]:
|
|
2185
2200
|
# We start by making a data frame of the following weather stations
|
|
2186
2201
|
station_query = ''.join(stations)
|
|
2187
2202
|
|
|
@@ -2290,6 +2305,76 @@ class datapull:
|
|
|
2290
2305
|
"min_temp_c": "avg_min_temp_c",
|
|
2291
2306
|
"mean_temp_c": "avg_mean_temp_c",
|
|
2292
2307
|
"precip_in": "avg_mean_perc"}, inplace=True)
|
|
2308
|
+
elif country == "ZA__ASOS":
|
|
2309
|
+
weather_data_list = []
|
|
2310
|
+
|
|
2311
|
+
for city in cities:
|
|
2312
|
+
geolocator = Nominatim(user_agent="MyApp")
|
|
2313
|
+
location = geolocator.geocode(city)
|
|
2314
|
+
url = "https://archive-api.open-meteo.com/v1/archive"
|
|
2315
|
+
|
|
2316
|
+
params = {
|
|
2317
|
+
"latitude": location.latitude,
|
|
2318
|
+
"longitude": location.longitude,
|
|
2319
|
+
"start_date": formatted_date,
|
|
2320
|
+
"end_date": today.strftime("%Y-%m-%d"),
|
|
2321
|
+
"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
|
|
2322
|
+
"timezone": "auto"
|
|
2323
|
+
}
|
|
2324
|
+
|
|
2325
|
+
response = requests.get(url, params=params)
|
|
2326
|
+
response_data = response.json()
|
|
2327
|
+
|
|
2328
|
+
daily_data = response_data["daily"]
|
|
2329
|
+
dates = daily_data["time"]
|
|
2330
|
+
|
|
2331
|
+
data = pd.DataFrame({
|
|
2332
|
+
"day": dates,
|
|
2333
|
+
"max_temp_f": daily_data["temperature_2m_max"],
|
|
2334
|
+
"min_temp_f": daily_data["temperature_2m_min"],
|
|
2335
|
+
"precip_in": daily_data["precipitation_sum"]
|
|
2336
|
+
})
|
|
2337
|
+
data["city"] = city
|
|
2338
|
+
weather_data_list.append(data)
|
|
2339
|
+
|
|
2340
|
+
weather = pd.concat(weather_data_list)
|
|
2341
|
+
|
|
2342
|
+
# Convert the date column to a Date type
|
|
2343
|
+
weather["day"] = pd.to_datetime(weather["day"])
|
|
2344
|
+
|
|
2345
|
+
# Replace None values
|
|
2346
|
+
weather["max_temp_f"].replace("None", 0, inplace=True)
|
|
2347
|
+
weather["min_temp_f"].replace("None", 0, inplace=True)
|
|
2348
|
+
weather["precip_in"].replace("None", 0, inplace=True)
|
|
2349
|
+
|
|
2350
|
+
weather[["max_temp_f", "min_temp_f", "precip_in"]] = weather[["max_temp_f", "min_temp_f", "precip_in"]].apply(pd.to_numeric)
|
|
2351
|
+
|
|
2352
|
+
# Estimate mean temperature
|
|
2353
|
+
weather["mean_temp_f"] = (weather["max_temp_f"] + weather["min_temp_f"]) / 2
|
|
2354
|
+
|
|
2355
|
+
# Convert Fahrenheit to Celsius for max_temp_f
|
|
2356
|
+
weather["max_temp_c"] = (weather["max_temp_f"] - 32) * 5 / 9
|
|
2357
|
+
|
|
2358
|
+
# Convert Fahrenheit to Celsius for min_temp_f
|
|
2359
|
+
weather["min_temp_c"] = (weather["min_temp_f"] - 32) * 5 / 9
|
|
2360
|
+
|
|
2361
|
+
# Convert Fahrenheit to Celsius for mean_temp_f
|
|
2362
|
+
weather["mean_temp_c"] = (weather["mean_temp_f"] - 32) * 5 / 9
|
|
2363
|
+
|
|
2364
|
+
# Determine the starting chosen day for each date
|
|
2365
|
+
weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2366
|
+
|
|
2367
|
+
# Group by week_starting and summarize
|
|
2368
|
+
numeric_columns = weather.select_dtypes(include='number').columns
|
|
2369
|
+
weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
|
|
2370
|
+
weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
|
|
2371
|
+
"min_temp_f": "avg_min_temp_f",
|
|
2372
|
+
"mean_temp_f": "avg_mean_temp_f",
|
|
2373
|
+
"max_temp_c": "avg_max_temp_c",
|
|
2374
|
+
"min_temp_c": "avg_min_temp_c",
|
|
2375
|
+
"mean_temp_c": "avg_mean_temp_c",
|
|
2376
|
+
"precip_in": "avg_mean_perc"}, inplace=True)
|
|
2377
|
+
|
|
2293
2378
|
else:
|
|
2294
2379
|
# We start by making a data frame of the following weather stations
|
|
2295
2380
|
station_query = ''.join(stations)
|
|
@@ -2563,8 +2648,57 @@ class datapull:
|
|
|
2563
2648
|
# Change index to datetime
|
|
2564
2649
|
weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
|
|
2565
2650
|
|
|
2651
|
+
elif country == "ZA__ASOS":
|
|
2652
|
+
cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
|
|
2653
|
+
start_date = formatted_date
|
|
2654
|
+
end_date = today.strftime("%Y-%m-%d")
|
|
2655
|
+
|
|
2656
|
+
weather_data_list = []
|
|
2657
|
+
|
|
2658
|
+
for city in cities:
|
|
2659
|
+
geolocator = Nominatim(user_agent="MyApp")
|
|
2660
|
+
location = geolocator.geocode(city)
|
|
2661
|
+
url = "https://archive-api.open-meteo.com/v1/archive"
|
|
2662
|
+
|
|
2663
|
+
params = {
|
|
2664
|
+
"latitude": location.latitude,
|
|
2665
|
+
"longitude": location.longitude,
|
|
2666
|
+
"start_date": start_date,
|
|
2667
|
+
"end_date": end_date,
|
|
2668
|
+
"daily": "precipitation_sum",
|
|
2669
|
+
"timezone": "auto"
|
|
2670
|
+
}
|
|
2671
|
+
|
|
2672
|
+
response = requests.get(url, params=params)
|
|
2673
|
+
response_data = response.json()
|
|
2674
|
+
|
|
2675
|
+
daily_data = response_data["daily"]["precipitation_sum"]
|
|
2676
|
+
dates = response_data["daily"]["time"]
|
|
2677
|
+
|
|
2678
|
+
data = pd.DataFrame({"date": dates, "rainfall": daily_data})
|
|
2679
|
+
data["city"] = city
|
|
2680
|
+
|
|
2681
|
+
weather_data_list.append(data)
|
|
2682
|
+
|
|
2683
|
+
# Combine all city data into a single data frame
|
|
2684
|
+
all_weather_data = pd.concat(weather_data_list)
|
|
2685
|
+
|
|
2686
|
+
# Convert the date column to a Date type
|
|
2687
|
+
all_weather_data["date"] = pd.to_datetime(all_weather_data["date"])
|
|
2688
|
+
|
|
2689
|
+
# Set week commencing col up
|
|
2690
|
+
all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2691
|
+
|
|
2692
|
+
# Group by week_starting and summarize
|
|
2693
|
+
numeric_columns = all_weather_data.select_dtypes(include='number').columns
|
|
2694
|
+
weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
|
|
2695
|
+
weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
|
|
2696
|
+
|
|
2697
|
+
# Change index to datetime
|
|
2698
|
+
weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
|
|
2699
|
+
|
|
2566
2700
|
# Merge the dataframes
|
|
2567
|
-
if country
|
|
2701
|
+
if country in ["AU__ASOS", "DE__ASOS", "FR__ASOS", "GB__ASOS", "ZA__ASOS"]:
|
|
2568
2702
|
merged_df = weekly_avg_rain.merge(weekly_avg_temp, on="week_starting")
|
|
2569
2703
|
else:
|
|
2570
2704
|
merged_df = weekly_avg_temp
|
|
@@ -2575,3 +2709,4 @@ class datapull:
|
|
|
2575
2709
|
final_weather = ims_proc.rename_cols(merged_df, 'seas_')
|
|
2576
2710
|
|
|
2577
2711
|
return final_weather
|
|
2712
|
+
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|