imsciences 0.6.0.5__tar.gz → 0.6.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {imsciences-0.6.0.5 → imsciences-0.6.0.8}/PKG-INFO +1 -1
- {imsciences-0.6.0.5 → imsciences-0.6.0.8}/imsciences/datafunctions.py +148 -14
- {imsciences-0.6.0.5 → imsciences-0.6.0.8}/imsciences.egg-info/PKG-INFO +1 -1
- {imsciences-0.6.0.5 → imsciences-0.6.0.8}/setup.py +1 -1
- {imsciences-0.6.0.5 → imsciences-0.6.0.8}/README.md +0 -0
- {imsciences-0.6.0.5 → imsciences-0.6.0.8}/imsciences/__init__.py +0 -0
- {imsciences-0.6.0.5 → imsciences-0.6.0.8}/imsciences.egg-info/SOURCES.txt +0 -0
- {imsciences-0.6.0.5 → imsciences-0.6.0.8}/imsciences.egg-info/dependency_links.txt +0 -0
- {imsciences-0.6.0.5 → imsciences-0.6.0.8}/imsciences.egg-info/requires.txt +0 -0
- {imsciences-0.6.0.5 → imsciences-0.6.0.8}/imsciences.egg-info/top_level.txt +0 -0
- {imsciences-0.6.0.5 → imsciences-0.6.0.8}/setup.cfg +0 -0
|
@@ -9,7 +9,7 @@ import re
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
from fredapi import Fred
|
|
11
11
|
import time
|
|
12
|
-
from datetime import datetime
|
|
12
|
+
from datetime import datetime,timedelta
|
|
13
13
|
from cif import cif
|
|
14
14
|
from io import StringIO
|
|
15
15
|
import urllib
|
|
@@ -440,7 +440,7 @@ class dataprocessing:
|
|
|
440
440
|
|
|
441
441
|
return fig
|
|
442
442
|
|
|
443
|
-
def week_of_year_mapping(
|
|
443
|
+
def week_of_year_mapping(df, week_col, start_day_str):
|
|
444
444
|
|
|
445
445
|
# Mapping of string day names to day numbers (1 for Monday, 7 for Sunday)
|
|
446
446
|
day_mapping = {
|
|
@@ -456,15 +456,15 @@ class dataprocessing:
|
|
|
456
456
|
def week_to_startdate(week_str, start_day):
|
|
457
457
|
year, week = map(int, week_str.split('-W'))
|
|
458
458
|
first_day_of_year = datetime(year, 1, 1)
|
|
459
|
-
|
|
460
|
-
days_to_add = (7 - day_of_week + 1) if day_of_week > 4 else (1 - day_of_week)
|
|
461
|
-
start_of_iso_week = first_day_of_year + datetime.timedelta(days=days_to_add)
|
|
459
|
+
first_weekday_of_year = first_day_of_year.weekday() # Monday is 0 and Sunday is 6
|
|
462
460
|
|
|
463
|
-
#
|
|
464
|
-
|
|
465
|
-
|
|
461
|
+
# Calculate days to adjust to the desired start day of the week
|
|
462
|
+
days_to_adjust = (start_day - 1 - first_weekday_of_year) % 7
|
|
463
|
+
start_of_iso_week = first_day_of_year + timedelta(days=days_to_adjust)
|
|
466
464
|
|
|
467
|
-
|
|
465
|
+
# Calculate the start of the desired week
|
|
466
|
+
start_of_week = start_of_iso_week + timedelta(weeks=week - 1)
|
|
467
|
+
return start_of_week
|
|
468
468
|
|
|
469
469
|
# Apply the function to each row in the specified week column
|
|
470
470
|
df['OBS'] = df[week_col].apply(lambda x: week_to_startdate(x, start_day)).dt.strftime('%d/%m/%Y')
|
|
@@ -1334,7 +1334,7 @@ class datapull:
|
|
|
1334
1334
|
print(" - Usage: pull_ons_data(series_list, week_commencing)")
|
|
1335
1335
|
print(" - Example: pull_ons_data([{'series_id': 'LMSBSA', 'dataset_id': 'LMS'}], 'mon')")
|
|
1336
1336
|
|
|
1337
|
-
print("\n4.
|
|
1337
|
+
print("\n4. pull_oecd")
|
|
1338
1338
|
print(" - Description: Fetch macroeconomic data from OECD and other sources for a specified country.")
|
|
1339
1339
|
print(" - Usage: pull_macro(country='GBR', week_commencing='mon')")
|
|
1340
1340
|
print(" - Example: pull_macro('GBR', 'mon')")
|
|
@@ -2108,11 +2108,17 @@ class datapull:
|
|
|
2108
2108
|
return df_final_combined
|
|
2109
2109
|
|
|
2110
2110
|
def pull_weather(self, week_commencing, country) -> pd.DataFrame:
|
|
2111
|
+
import pandas as pd
|
|
2112
|
+
import urllib.request
|
|
2113
|
+
from datetime import datetime
|
|
2114
|
+
import requests
|
|
2115
|
+
from geopy.geocoders import Nominatim
|
|
2116
|
+
|
|
2111
2117
|
# Week commencing dictionary
|
|
2112
2118
|
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
2113
2119
|
|
|
2114
2120
|
# Country dictionary
|
|
2115
|
-
country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada"}
|
|
2121
|
+
country_dict = {"AUS": "AU__ASOS", "GBR": "GB__ASOS", "USA": "USCRN", "DEU": "DE__ASOS", "CAN": "Canada", "ZAF": "ZA__ASOS"}
|
|
2116
2122
|
|
|
2117
2123
|
# Function to flatten a list of nested lists into a list
|
|
2118
2124
|
def flatten_list(nested_list):
|
|
@@ -2180,8 +2186,17 @@ class datapull:
|
|
|
2180
2186
|
"&stations=CWIL", "&stations=CXWB", "&stations=CYZS",
|
|
2181
2187
|
"&stations=CWJC", "&stations=CYFB", "&stations=CWUW"])
|
|
2182
2188
|
|
|
2189
|
+
elif country == "ZA__ASOS":
|
|
2190
|
+
cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
|
|
2191
|
+
stations = []
|
|
2192
|
+
|
|
2193
|
+
for city in cities:
|
|
2194
|
+
geolocator = Nominatim(user_agent="MyApp")
|
|
2195
|
+
location = geolocator.geocode(city)
|
|
2196
|
+
stations.append(f"&latitude={location.latitude}&longitude={location.longitude}")
|
|
2197
|
+
|
|
2183
2198
|
# Temperature
|
|
2184
|
-
if country
|
|
2199
|
+
if country in ["GB__ASOS", "AU__ASOS", "DE__ASOS", "FR__ASOS"]:
|
|
2185
2200
|
# We start by making a data frame of the following weather stations
|
|
2186
2201
|
station_query = ''.join(stations)
|
|
2187
2202
|
|
|
@@ -2290,6 +2305,76 @@ class datapull:
|
|
|
2290
2305
|
"min_temp_c": "avg_min_temp_c",
|
|
2291
2306
|
"mean_temp_c": "avg_mean_temp_c",
|
|
2292
2307
|
"precip_in": "avg_mean_perc"}, inplace=True)
|
|
2308
|
+
elif country == "ZA__ASOS":
|
|
2309
|
+
weather_data_list = []
|
|
2310
|
+
|
|
2311
|
+
for city in cities:
|
|
2312
|
+
geolocator = Nominatim(user_agent="MyApp")
|
|
2313
|
+
location = geolocator.geocode(city)
|
|
2314
|
+
url = "https://archive-api.open-meteo.com/v1/archive"
|
|
2315
|
+
|
|
2316
|
+
params = {
|
|
2317
|
+
"latitude": location.latitude,
|
|
2318
|
+
"longitude": location.longitude,
|
|
2319
|
+
"start_date": formatted_date,
|
|
2320
|
+
"end_date": today.strftime("%Y-%m-%d"),
|
|
2321
|
+
"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
|
|
2322
|
+
"timezone": "auto"
|
|
2323
|
+
}
|
|
2324
|
+
|
|
2325
|
+
response = requests.get(url, params=params)
|
|
2326
|
+
response_data = response.json()
|
|
2327
|
+
|
|
2328
|
+
daily_data = response_data["daily"]
|
|
2329
|
+
dates = daily_data["time"]
|
|
2330
|
+
|
|
2331
|
+
data = pd.DataFrame({
|
|
2332
|
+
"day": dates,
|
|
2333
|
+
"max_temp_f": daily_data["temperature_2m_max"],
|
|
2334
|
+
"min_temp_f": daily_data["temperature_2m_min"],
|
|
2335
|
+
"precip_in": daily_data["precipitation_sum"]
|
|
2336
|
+
})
|
|
2337
|
+
data["city"] = city
|
|
2338
|
+
weather_data_list.append(data)
|
|
2339
|
+
|
|
2340
|
+
weather = pd.concat(weather_data_list)
|
|
2341
|
+
|
|
2342
|
+
# Convert the date column to a Date type
|
|
2343
|
+
weather["day"] = pd.to_datetime(weather["day"])
|
|
2344
|
+
|
|
2345
|
+
# Replace None values
|
|
2346
|
+
weather["max_temp_f"].replace("None", 0, inplace=True)
|
|
2347
|
+
weather["min_temp_f"].replace("None", 0, inplace=True)
|
|
2348
|
+
weather["precip_in"].replace("None", 0, inplace=True)
|
|
2349
|
+
|
|
2350
|
+
weather[["max_temp_f", "min_temp_f", "precip_in"]] = weather[["max_temp_f", "min_temp_f", "precip_in"]].apply(pd.to_numeric)
|
|
2351
|
+
|
|
2352
|
+
# Estimate mean temperature
|
|
2353
|
+
weather["mean_temp_f"] = (weather["max_temp_f"] + weather["min_temp_f"]) / 2
|
|
2354
|
+
|
|
2355
|
+
# Convert Fahrenheit to Celsius for max_temp_f
|
|
2356
|
+
weather["max_temp_c"] = (weather["max_temp_f"] - 32) * 5 / 9
|
|
2357
|
+
|
|
2358
|
+
# Convert Fahrenheit to Celsius for min_temp_f
|
|
2359
|
+
weather["min_temp_c"] = (weather["min_temp_f"] - 32) * 5 / 9
|
|
2360
|
+
|
|
2361
|
+
# Convert Fahrenheit to Celsius for mean_temp_f
|
|
2362
|
+
weather["mean_temp_c"] = (weather["mean_temp_f"] - 32) * 5 / 9
|
|
2363
|
+
|
|
2364
|
+
# Determine the starting chosen day for each date
|
|
2365
|
+
weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2366
|
+
|
|
2367
|
+
# Group by week_starting and summarize
|
|
2368
|
+
numeric_columns = weather.select_dtypes(include='number').columns
|
|
2369
|
+
weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
|
|
2370
|
+
weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
|
|
2371
|
+
"min_temp_f": "avg_min_temp_f",
|
|
2372
|
+
"mean_temp_f": "avg_mean_temp_f",
|
|
2373
|
+
"max_temp_c": "avg_max_temp_c",
|
|
2374
|
+
"min_temp_c": "avg_min_temp_c",
|
|
2375
|
+
"mean_temp_c": "avg_mean_temp_c",
|
|
2376
|
+
"precip_in": "avg_mean_perc"}, inplace=True)
|
|
2377
|
+
|
|
2293
2378
|
else:
|
|
2294
2379
|
# We start by making a data frame of the following weather stations
|
|
2295
2380
|
station_query = ''.join(stations)
|
|
@@ -2563,8 +2648,57 @@ class datapull:
|
|
|
2563
2648
|
# Change index to datetime
|
|
2564
2649
|
weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
|
|
2565
2650
|
|
|
2651
|
+
elif country == "ZA__ASOS":
|
|
2652
|
+
cities = ["Johannesburg", "Cape Town", "Durban", "Pretoria"]
|
|
2653
|
+
start_date = formatted_date
|
|
2654
|
+
end_date = today.strftime("%Y-%m-%d")
|
|
2655
|
+
|
|
2656
|
+
weather_data_list = []
|
|
2657
|
+
|
|
2658
|
+
for city in cities:
|
|
2659
|
+
geolocator = Nominatim(user_agent="MyApp")
|
|
2660
|
+
location = geolocator.geocode(city)
|
|
2661
|
+
url = "https://archive-api.open-meteo.com/v1/archive"
|
|
2662
|
+
|
|
2663
|
+
params = {
|
|
2664
|
+
"latitude": location.latitude,
|
|
2665
|
+
"longitude": location.longitude,
|
|
2666
|
+
"start_date": start_date,
|
|
2667
|
+
"end_date": end_date,
|
|
2668
|
+
"daily": "precipitation_sum",
|
|
2669
|
+
"timezone": "auto"
|
|
2670
|
+
}
|
|
2671
|
+
|
|
2672
|
+
response = requests.get(url, params=params)
|
|
2673
|
+
response_data = response.json()
|
|
2674
|
+
|
|
2675
|
+
daily_data = response_data["daily"]["precipitation_sum"]
|
|
2676
|
+
dates = response_data["daily"]["time"]
|
|
2677
|
+
|
|
2678
|
+
data = pd.DataFrame({"date": dates, "rainfall": daily_data})
|
|
2679
|
+
data["city"] = city
|
|
2680
|
+
|
|
2681
|
+
weather_data_list.append(data)
|
|
2682
|
+
|
|
2683
|
+
# Combine all city data into a single data frame
|
|
2684
|
+
all_weather_data = pd.concat(weather_data_list)
|
|
2685
|
+
|
|
2686
|
+
# Convert the date column to a Date type
|
|
2687
|
+
all_weather_data["date"] = pd.to_datetime(all_weather_data["date"])
|
|
2688
|
+
|
|
2689
|
+
# Set week commencing col up
|
|
2690
|
+
all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2691
|
+
|
|
2692
|
+
# Group by week_starting and summarize
|
|
2693
|
+
numeric_columns = all_weather_data.select_dtypes(include='number').columns
|
|
2694
|
+
weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
|
|
2695
|
+
weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
|
|
2696
|
+
|
|
2697
|
+
# Change index to datetime
|
|
2698
|
+
weekly_avg_rain.index = pd.to_datetime(weekly_avg_rain.index)
|
|
2699
|
+
|
|
2566
2700
|
# Merge the dataframes
|
|
2567
|
-
if country
|
|
2701
|
+
if country in ["AU__ASOS", "DE__ASOS", "FR__ASOS", "GB__ASOS", "ZA__ASOS"]:
|
|
2568
2702
|
merged_df = weekly_avg_rain.merge(weekly_avg_temp, on="week_starting")
|
|
2569
2703
|
else:
|
|
2570
2704
|
merged_df = weekly_avg_temp
|
|
@@ -2574,4 +2708,4 @@ class datapull:
|
|
|
2574
2708
|
|
|
2575
2709
|
final_weather = ims_proc.rename_cols(merged_df, 'seas_')
|
|
2576
2710
|
|
|
2577
|
-
return final_weather
|
|
2711
|
+
return final_weather
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|