imsciences 0.6.0.1__py3-none-any.whl → 0.6.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/datafunctions.py +220 -174
- {imsciences-0.6.0.1.dist-info → imsciences-0.6.0.3.dist-info}/METADATA +1 -1
- {imsciences-0.6.0.1.dist-info → imsciences-0.6.0.3.dist-info}/RECORD +5 -5
- {imsciences-0.6.0.1.dist-info → imsciences-0.6.0.3.dist-info}/WHEEL +0 -0
- {imsciences-0.6.0.1.dist-info → imsciences-0.6.0.3.dist-info}/top_level.txt +0 -0
imsciences/datafunctions.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
import calendar
|
|
3
|
-
import requests
|
|
4
3
|
import os
|
|
5
4
|
import plotly.express as px
|
|
6
5
|
import plotly.graph_objs as go
|
|
@@ -18,7 +17,8 @@ import requests_cache
|
|
|
18
17
|
import urllib.request
|
|
19
18
|
import requests
|
|
20
19
|
from geopy.geocoders import Nominatim
|
|
21
|
-
|
|
20
|
+
import subprocess
|
|
21
|
+
import json
|
|
22
22
|
|
|
23
23
|
class dataprocessing:
|
|
24
24
|
|
|
@@ -180,7 +180,16 @@ class dataprocessing:
|
|
|
180
180
|
print(" - Description: Remove zero values in a specified column.")
|
|
181
181
|
print(" - Usage: remove_zero_values(self, data_frame, column_to_filter)")
|
|
182
182
|
print(" - Example: remove_zero_values(None, df, 'Funeral_Delivery')")
|
|
183
|
-
|
|
183
|
+
|
|
184
|
+
print("\n32. upgrade all packages")
|
|
185
|
+
print(" - Description: Upgrades all packages.")
|
|
186
|
+
print(" - Usage: upgrade_outdated_packages()")
|
|
187
|
+
print(" - Example: upgrade_outdated_packages()")
|
|
188
|
+
|
|
189
|
+
print("\n33. Convert Mixed Formats Dates")
|
|
190
|
+
print(" - Description: Convert a mix of US and UK dates to datetime.")
|
|
191
|
+
print(" - Usage: convert_mixed_formats_dates(df, datecol)")
|
|
192
|
+
print(" - Example: convert_mixed_formats_dates(df, 'OBS')")
|
|
184
193
|
|
|
185
194
|
def get_wd_levels(self, levels):
|
|
186
195
|
"""
|
|
@@ -598,10 +607,6 @@ class dataprocessing:
|
|
|
598
607
|
)
|
|
599
608
|
return df
|
|
600
609
|
|
|
601
|
-
# Apply the fix to the specified column
|
|
602
|
-
df[date_col] = df[date_col].apply(lambda x: fix_date(x) if not pd.isnull(x) else x)
|
|
603
|
-
return df
|
|
604
|
-
|
|
605
610
|
def combine_sheets(self, all_sheets):
|
|
606
611
|
"""
|
|
607
612
|
Combines multiple DataFrames from a dictionary into a single DataFrame.
|
|
@@ -1194,6 +1199,69 @@ class dataprocessing:
|
|
|
1194
1199
|
|
|
1195
1200
|
return data_frame.loc[~(data_frame[column_to_filter] ==0)]
|
|
1196
1201
|
|
|
1202
|
+
def upgrade_outdated_packages(self):
|
|
1203
|
+
try:
|
|
1204
|
+
# Get all installed packages
|
|
1205
|
+
installed_packages_result = subprocess.run("pip list --format=json", shell=True, capture_output=True, text=True)
|
|
1206
|
+
installed_packages = json.loads(installed_packages_result.stdout)
|
|
1207
|
+
|
|
1208
|
+
# Get the list of outdated packages
|
|
1209
|
+
outdated_packages_result = subprocess.run("pip list --outdated --format=json", shell=True, capture_output=True, text=True)
|
|
1210
|
+
outdated_packages = json.loads(outdated_packages_result.stdout)
|
|
1211
|
+
|
|
1212
|
+
# Create a set of outdated package names for quick lookup
|
|
1213
|
+
outdated_package_names = {pkg['name'] for pkg in outdated_packages}
|
|
1214
|
+
|
|
1215
|
+
# Upgrade only outdated packages
|
|
1216
|
+
for package in installed_packages:
|
|
1217
|
+
package_name = package['name']
|
|
1218
|
+
if package_name in outdated_package_names:
|
|
1219
|
+
try:
|
|
1220
|
+
print(f"Upgrading package: {package_name}")
|
|
1221
|
+
upgrade_result = subprocess.run(f"pip install --upgrade {package_name}", shell=True, capture_output=True, text=True)
|
|
1222
|
+
if upgrade_result.returncode == 0:
|
|
1223
|
+
print(f"Successfully upgraded {package_name}")
|
|
1224
|
+
else:
|
|
1225
|
+
print(f"Failed to upgrade {package_name}: {upgrade_result.stderr}")
|
|
1226
|
+
except Exception as e:
|
|
1227
|
+
print(f"An error occurred while upgrading {package_name}: {e}")
|
|
1228
|
+
else:
|
|
1229
|
+
print(f"{package_name} is already up to date")
|
|
1230
|
+
except Exception as e:
|
|
1231
|
+
print(f"An error occurred during the upgrade process: {e}")
|
|
1232
|
+
|
|
1233
|
+
def convert_mixed_formats_dates(self, df, column_name):
|
|
1234
|
+
# Convert initial dates to datetime with coercion to handle errors
|
|
1235
|
+
df[column_name] = pd.to_datetime(df[column_name], errors='coerce')
|
|
1236
|
+
df[column_name] = df[column_name].astype(str)
|
|
1237
|
+
corrected_dates = []
|
|
1238
|
+
|
|
1239
|
+
for date_str in df[column_name]:
|
|
1240
|
+
date_str = date_str.replace('-', '').replace('/', '')
|
|
1241
|
+
if len(date_str) == 8:
|
|
1242
|
+
year = date_str[:4]
|
|
1243
|
+
month = date_str[4:6]
|
|
1244
|
+
day = date_str[6:8]
|
|
1245
|
+
if int(day) <= 12:
|
|
1246
|
+
# Swap month and day
|
|
1247
|
+
corrected_date_str = f"{year}-{day}-{month}"
|
|
1248
|
+
else:
|
|
1249
|
+
corrected_date_str = f"{year}-{month}-{day}"
|
|
1250
|
+
# Convert to datetime
|
|
1251
|
+
corrected_date = pd.to_datetime(corrected_date_str, errors='coerce')
|
|
1252
|
+
else:
|
|
1253
|
+
corrected_date = pd.to_datetime(date_str, errors='coerce')
|
|
1254
|
+
|
|
1255
|
+
corrected_dates.append(corrected_date)
|
|
1256
|
+
|
|
1257
|
+
# Check length of the corrected_dates list
|
|
1258
|
+
if len(corrected_dates) != len(df):
|
|
1259
|
+
raise ValueError("Length of corrected_dates does not match the original DataFrame")
|
|
1260
|
+
|
|
1261
|
+
# Assign the corrected dates back to the DataFrame
|
|
1262
|
+
df[column_name] = corrected_dates
|
|
1263
|
+
return df
|
|
1264
|
+
|
|
1197
1265
|
|
|
1198
1266
|
|
|
1199
1267
|
|
|
@@ -1483,14 +1551,13 @@ class datapull:
|
|
|
1483
1551
|
|
|
1484
1552
|
return ons_df_final
|
|
1485
1553
|
|
|
1486
|
-
def pull_macro(self, country
|
|
1487
|
-
|
|
1554
|
+
def pull_macro(self, country: str = "GBR", week_commencing: str = "mon"):
|
|
1488
1555
|
# Change country input to list
|
|
1489
1556
|
countries_list = [country]
|
|
1490
|
-
|
|
1557
|
+
|
|
1491
1558
|
# Check if the data wants to be inputted at any other week commencing date
|
|
1492
|
-
day_dict = {"mon"
|
|
1493
|
-
|
|
1559
|
+
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
1560
|
+
|
|
1494
1561
|
# Two useful functions for quarterly data
|
|
1495
1562
|
# Define a function to get quarterly data
|
|
1496
1563
|
def get_quarter(p_date: datetime.date) -> int:
|
|
@@ -1500,39 +1567,32 @@ class datapull:
|
|
|
1500
1567
|
def get_last_day_of_the_quarter(p_date: datetime.date):
|
|
1501
1568
|
quarter = get_quarter(p_date)
|
|
1502
1569
|
return datetime(p_date.year + 3 * quarter // 12, 3 * quarter % 12 + 1, 1) + pd.Timedelta(days=-1)
|
|
1503
|
-
|
|
1570
|
+
|
|
1504
1571
|
# For the monthly data
|
|
1505
|
-
data_M, subjects_M, measures_M = cif.createDataFrameFromOECD(countries
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
'PRINTO01'],
|
|
1525
|
-
measure = ['IXOBSA','IXNSA','IXNB','STSA','ST','GPSA','GY'], frequency = 'Q',startDate = '2015-01')
|
|
1526
|
-
|
|
1527
|
-
data_Q=data_Q.stack(level=[0,-1,-2]).reset_index()
|
|
1528
|
-
|
|
1529
|
-
# create a data frame dictionary to store your monthly data frames
|
|
1530
|
-
DataFrameDict_M = {elem : pd.DataFrame() for elem in countries_list}
|
|
1572
|
+
data_M, subjects_M, measures_M = cif.createDataFrameFromOECD(countries=countries_list, dsname='MEI',
|
|
1573
|
+
subject=['LCEAMN01', 'LCEAPR', 'CSCICP03', 'CPALTT01',
|
|
1574
|
+
'LRHUTTTT', 'LORSGPRT', 'IR3TIB01',
|
|
1575
|
+
'PRINTO01'],
|
|
1576
|
+
measure=['IXOBSA', 'IXNSA', 'IXNB', 'STSA', 'ST', 'GPSA', 'GY'],
|
|
1577
|
+
frequency='M', startDate='2015-01')
|
|
1578
|
+
data_M = data_M.stack(level=[0, -1, -2]).reset_index()
|
|
1579
|
+
|
|
1580
|
+
data_Q, subjects_Q, measures_Q = cif.createDataFrameFromOECD(countries=countries_list, dsname='MEI',
|
|
1581
|
+
subject=['LCEAMN01', 'LCEAPR', 'CSCICP03', 'CPALTT01',
|
|
1582
|
+
'LRHUTTTT', 'LORSGPRT', 'IR3TIB01',
|
|
1583
|
+
'PRINTO01'],
|
|
1584
|
+
measure=['IXOBSA', 'IXNSA', 'IXNB', 'STSA', 'ST', 'GPSA', 'GY'],
|
|
1585
|
+
frequency='Q', startDate='2015-01')
|
|
1586
|
+
|
|
1587
|
+
data_Q = data_Q.stack(level=[0, -1, -2]).reset_index()
|
|
1588
|
+
|
|
1589
|
+
# Create a data frame dictionary to store your monthly data frames
|
|
1590
|
+
DataFrameDict_M = {elem: pd.DataFrame() for elem in countries_list}
|
|
1531
1591
|
for key in DataFrameDict_M.keys():
|
|
1532
1592
|
DataFrameDict_M[key] = data_M[:][data_M.country == key]
|
|
1533
1593
|
|
|
1534
|
-
#
|
|
1535
|
-
DataFrameDict_Q = {elem
|
|
1594
|
+
# Create a data frame dictionary to store your quarterly data frames
|
|
1595
|
+
DataFrameDict_Q = {elem: pd.DataFrame() for elem in countries_list}
|
|
1536
1596
|
for key in DataFrameDict_Q.keys():
|
|
1537
1597
|
DataFrameDict_Q[key] = data_Q[:][data_Q.country == key]
|
|
1538
1598
|
|
|
@@ -1540,41 +1600,40 @@ class datapull:
|
|
|
1540
1600
|
countries_df_list_M = []
|
|
1541
1601
|
for i in countries_list:
|
|
1542
1602
|
df = pd.DataFrame(DataFrameDict_M[i])
|
|
1543
|
-
df.rename(columns={0:'Values'},inplace=True)
|
|
1544
|
-
df = pd.pivot_table(data=df,index='time',values='Values',columns=['subject','measure'])
|
|
1603
|
+
df.rename(columns={0: 'Values'}, inplace=True)
|
|
1604
|
+
df = pd.pivot_table(data=df, index='time', values='Values', columns=['subject', 'measure'])
|
|
1545
1605
|
countries_df_list_M.append(df)
|
|
1546
1606
|
|
|
1547
1607
|
# Create a quarterly list of the dataframes to iterate through
|
|
1548
1608
|
countries_df_list_Q = []
|
|
1549
1609
|
for i in countries_list:
|
|
1550
1610
|
df = pd.DataFrame(DataFrameDict_Q[i])
|
|
1551
|
-
df.rename(columns={0:'Values'},inplace=True)
|
|
1552
|
-
df = pd.pivot_table(data=df,index='time',values='Values',columns=['subject','measure'])
|
|
1611
|
+
df.rename(columns={0: 'Values'}, inplace=True)
|
|
1612
|
+
df = pd.pivot_table(data=df, index='time', values='Values', columns=['subject', 'measure'])
|
|
1553
1613
|
countries_df_list_Q.append(df)
|
|
1554
1614
|
|
|
1555
|
-
combined_countries_df_list = list(zip(countries_df_list_M,countries_df_list_Q))
|
|
1615
|
+
combined_countries_df_list = list(zip(countries_df_list_M, countries_df_list_Q))
|
|
1556
1616
|
|
|
1557
1617
|
# Loop through and create dataframes for every country
|
|
1558
1618
|
for index, data in enumerate(combined_countries_df_list):
|
|
1559
|
-
|
|
1560
1619
|
# Find country being extracted
|
|
1561
|
-
country = countries_list[index]
|
|
1620
|
+
country = countries_list[index]
|
|
1562
1621
|
print(country)
|
|
1563
1622
|
|
|
1564
1623
|
# For consumer confidence
|
|
1565
1624
|
# For countries with no data
|
|
1566
|
-
if country in ['CAN','IND','NOR']:
|
|
1625
|
+
if country in ['CAN', 'IND', 'NOR']:
|
|
1567
1626
|
Consumer_Confidence_Index_df_M = pd.DataFrame()
|
|
1568
1627
|
Consumer_Confidence_Index_df_Q = pd.DataFrame()
|
|
1569
|
-
# For countries with quarterly data
|
|
1628
|
+
# For countries with quarterly data
|
|
1570
1629
|
elif country in []:
|
|
1571
1630
|
Consumer_Confidence_Index_df_Q = data[1]['CSCICP03']['IXNSA']
|
|
1572
|
-
Consumer_Confidence_Index_df_Q.rename('consumer_confidence_index',inplace=True)
|
|
1631
|
+
Consumer_Confidence_Index_df_Q.rename('consumer_confidence_index', inplace=True)
|
|
1573
1632
|
Consumer_Confidence_Index_df_M = pd.DataFrame()
|
|
1574
1633
|
# For countries with monthly data
|
|
1575
1634
|
else:
|
|
1576
1635
|
Consumer_Confidence_Index_df_M = data[0]['CSCICP03']['IXNSA']
|
|
1577
|
-
Consumer_Confidence_Index_df_M.rename('consumer_confidence_index',inplace=True)
|
|
1636
|
+
Consumer_Confidence_Index_df_M.rename('consumer_confidence_index', inplace=True)
|
|
1578
1637
|
Consumer_Confidence_Index_df_Q = pd.DataFrame()
|
|
1579
1638
|
|
|
1580
1639
|
# For consumer prices for COST OF LIVING
|
|
@@ -1583,14 +1642,14 @@ class datapull:
|
|
|
1583
1642
|
Consumer_Price_Index_Cost_Of_Living_df_M = pd.DataFrame()
|
|
1584
1643
|
Consumer_Price_Index_Cost_Of_Living_df_Q = pd.DataFrame()
|
|
1585
1644
|
# For countries with quarterly data
|
|
1586
|
-
elif country in ['AUS','NZL']:
|
|
1645
|
+
elif country in ['AUS', 'NZL']:
|
|
1587
1646
|
Consumer_Price_Index_Cost_Of_Living_df_Q = data[1]['CPALTT01']['IXNB']
|
|
1588
|
-
Consumer_Price_Index_Cost_Of_Living_df_Q.rename('consumer_price_index_cost_of_living',inplace=True)
|
|
1647
|
+
Consumer_Price_Index_Cost_Of_Living_df_Q.rename('consumer_price_index_cost_of_living', inplace=True)
|
|
1589
1648
|
Consumer_Price_Index_Cost_Of_Living_df_M = pd.DataFrame()
|
|
1590
1649
|
# For countries with monthly data
|
|
1591
1650
|
else:
|
|
1592
1651
|
Consumer_Price_Index_Cost_Of_Living_df_M = data[0]['CPALTT01']['IXNB']
|
|
1593
|
-
Consumer_Price_Index_Cost_Of_Living_df_M.rename('consumer_price_index_cost_of_living',inplace=True)
|
|
1652
|
+
Consumer_Price_Index_Cost_Of_Living_df_M.rename('consumer_price_index_cost_of_living', inplace=True)
|
|
1594
1653
|
Consumer_Price_Index_Cost_Of_Living_df_Q = pd.DataFrame()
|
|
1595
1654
|
|
|
1596
1655
|
# For consumer prices FOR INFLATION
|
|
@@ -1599,125 +1658,113 @@ class datapull:
|
|
|
1599
1658
|
Consumer_Price_Index_Inflation_df_M = pd.DataFrame()
|
|
1600
1659
|
Consumer_Price_Index_Inflation_df_Q = pd.DataFrame()
|
|
1601
1660
|
# For countries with quarterly data
|
|
1602
|
-
elif country in ['AUS','NZL']:
|
|
1661
|
+
elif country in ['AUS', 'NZL']:
|
|
1603
1662
|
Consumer_Price_Index_Inflation_df_Q = data[1]['CPALTT01']['GY']
|
|
1604
|
-
Consumer_Price_Index_Inflation_df_Q.rename('consumer_price_index_inflation',inplace=True)
|
|
1663
|
+
Consumer_Price_Index_Inflation_df_Q.rename('consumer_price_index_inflation', inplace=True)
|
|
1605
1664
|
Consumer_Price_Index_Inflation_df_M = pd.DataFrame()
|
|
1606
1665
|
# For countries with monthly data
|
|
1607
1666
|
else:
|
|
1608
1667
|
Consumer_Price_Index_Inflation_df_M = data[0]['CPALTT01']['GY']
|
|
1609
|
-
Consumer_Price_Index_Inflation_df_M.rename('consumer_price_index_inflation',inplace=True)
|
|
1668
|
+
Consumer_Price_Index_Inflation_df_M.rename('consumer_price_index_inflation', inplace=True)
|
|
1610
1669
|
Consumer_Price_Index_Inflation_df_Q = pd.DataFrame()
|
|
1611
1670
|
|
|
1612
|
-
# For GDP Index Smoothed
|
|
1671
|
+
# For GDP Index Smoothed
|
|
1613
1672
|
# For countries with no data
|
|
1614
|
-
if country in ['NLD','CHE','NZL','SWE','NOR']:
|
|
1673
|
+
if country in ['NLD', 'CHE', 'NZL', 'SWE', 'NOR']:
|
|
1615
1674
|
GDP_Index_Smoothed_df_M = pd.DataFrame()
|
|
1616
1675
|
GDP_Index_Smoothed_df_Q = pd.DataFrame()
|
|
1617
1676
|
# For countries with quarterly data
|
|
1618
1677
|
elif country in []:
|
|
1619
1678
|
GDP_Index_Smoothed_df_Q = data[1]['LORSGPRT']['STSA']
|
|
1620
|
-
GDP_Index_Smoothed_df_Q.rename('gdp_index_smoothed',inplace=True)
|
|
1679
|
+
GDP_Index_Smoothed_df_Q.rename('gdp_index_smoothed', inplace=True)
|
|
1621
1680
|
GDP_Index_Smoothed_df_M = pd.DataFrame()
|
|
1622
1681
|
# For countries with monthly data
|
|
1623
1682
|
else:
|
|
1624
1683
|
GDP_Index_Smoothed_df_M = data[0]['LORSGPRT']['STSA']
|
|
1625
|
-
GDP_Index_Smoothed_df_M.rename('gdp_index_smoothed',inplace=True)
|
|
1684
|
+
GDP_Index_Smoothed_df_M.rename('gdp_index_smoothed', inplace=True)
|
|
1626
1685
|
GDP_Index_Smoothed_df_Q = pd.DataFrame()
|
|
1627
1686
|
|
|
1628
1687
|
# For Harmonised Unemployment Index
|
|
1629
1688
|
# For countries with no data
|
|
1630
|
-
if country in ['IND','CHE','ZAF','CHN']:
|
|
1689
|
+
if country in ['IND', 'CHE', 'ZAF', 'CHN']:
|
|
1631
1690
|
Harmonised_Unemployment_Index_df_M = pd.DataFrame()
|
|
1632
1691
|
Harmonised_Unemployment_Index_df_Q = pd.DataFrame()
|
|
1633
1692
|
# For countries with quarterly data
|
|
1634
1693
|
elif country in ['NZL']:
|
|
1635
1694
|
Harmonised_Unemployment_Index_df_Q = data[1]['LRHUTTTT']['STSA']
|
|
1636
|
-
Harmonised_Unemployment_Index_df_Q.rename('harmonised_unemployment_index',inplace=True)
|
|
1695
|
+
Harmonised_Unemployment_Index_df_Q.rename('harmonised_unemployment_index', inplace=True)
|
|
1637
1696
|
Harmonised_Unemployment_Index_df_M = pd.DataFrame()
|
|
1638
1697
|
# For countries with monthly data
|
|
1639
|
-
else:
|
|
1698
|
+
else:
|
|
1640
1699
|
Harmonised_Unemployment_Index_df_M = data[0]['LRHUTTTT']['STSA']
|
|
1641
|
-
Harmonised_Unemployment_Index_df_M.rename('harmonised_unemployment_index',inplace=True)
|
|
1700
|
+
Harmonised_Unemployment_Index_df_M.rename('harmonised_unemployment_index', inplace=True)
|
|
1642
1701
|
Harmonised_Unemployment_Index_df_Q = pd.DataFrame()
|
|
1643
1702
|
|
|
1644
|
-
# For hourly earnings index manufacturing
|
|
1703
|
+
# For hourly earnings index manufacturing
|
|
1645
1704
|
# For countries with no data
|
|
1646
|
-
if country in ['IND','CHE','ZAF','CHN']:
|
|
1705
|
+
if country in ['IND', 'CHE', 'ZAF', 'CHN']:
|
|
1647
1706
|
Hourly_Earnings_Index_Manufacturing_df_M = pd.DataFrame()
|
|
1648
1707
|
Hourly_Earnings_Index_Manufacturing_df_Q = pd.DataFrame()
|
|
1649
1708
|
# For countries with quarterly data
|
|
1650
|
-
elif country in ['FRA','DEU','ESP','AUS','NZL','KOR','NOR']:
|
|
1709
|
+
elif country in ['FRA', 'DEU', 'ESP', 'AUS', 'NZL', 'KOR', 'NOR']:
|
|
1651
1710
|
Hourly_Earnings_Index_Manufacturing_df_Q = data[1]['LCEAMN01']['IXOBSA']
|
|
1652
|
-
Hourly_Earnings_Index_Manufacturing_df_Q.rename('hourly_earnings_index_manufacturing',inplace=True)
|
|
1711
|
+
Hourly_Earnings_Index_Manufacturing_df_Q.rename('hourly_earnings_index_manufacturing', inplace=True)
|
|
1653
1712
|
Hourly_Earnings_Index_Manufacturing_df_M = pd.DataFrame()
|
|
1654
1713
|
# For countries with monthly data
|
|
1655
1714
|
else:
|
|
1656
1715
|
Hourly_Earnings_Index_Manufacturing_df_M = data[0]['LCEAMN01']['IXOBSA']
|
|
1657
|
-
Hourly_Earnings_Index_Manufacturing_df_M.rename('hourly_earnings_index_manufacturing',inplace=True)
|
|
1716
|
+
Hourly_Earnings_Index_Manufacturing_df_M.rename('hourly_earnings_index_manufacturing', inplace=True)
|
|
1658
1717
|
Hourly_Earnings_Index_Manufacturing_df_Q = pd.DataFrame()
|
|
1659
1718
|
|
|
1660
|
-
# For hourly earnings index private
|
|
1661
|
-
# Hourly_Earnings_Index_Private_df = data['GBR']['LCEAPR']['IXOBSA']
|
|
1662
|
-
# Hourly_Earnings_Index_Private_df.rename('Hourly Earnings Index Private',inplace=True)
|
|
1663
|
-
|
|
1664
1719
|
# For Short Term Interest Rate
|
|
1665
1720
|
# For countries with no data
|
|
1666
1721
|
if country in []:
|
|
1667
1722
|
Short_Term_Interest_Rate_df_M = pd.DataFrame()
|
|
1668
1723
|
Short_Term_Interest_Rate_df_Q = pd.DataFrame()
|
|
1669
|
-
# For countries with quarterly data
|
|
1724
|
+
# For countries with quarterly data
|
|
1670
1725
|
elif country in []:
|
|
1671
1726
|
Short_Term_Interest_Rate_df_Q = data[1]['IR3TIB01']['ST']
|
|
1672
|
-
Short_Term_Interest_Rate_df_Q.rename('short_term_interest_rate',inplace=True)
|
|
1727
|
+
Short_Term_Interest_Rate_df_Q.rename('short_term_interest_rate', inplace=True)
|
|
1673
1728
|
Short_Term_Interest_Rate_df_M = pd.DataFrame()
|
|
1674
1729
|
# For countries with monthly data
|
|
1675
1730
|
else:
|
|
1676
1731
|
Short_Term_Interest_Rate_df_M = data[0]['IR3TIB01']['ST']
|
|
1677
|
-
Short_Term_Interest_Rate_df_M.rename('short_term_interest_rate',inplace=True)
|
|
1732
|
+
Short_Term_Interest_Rate_df_M.rename('short_term_interest_rate', inplace=True)
|
|
1678
1733
|
Short_Term_Interest_Rate_df_Q = pd.DataFrame()
|
|
1679
1734
|
|
|
1680
1735
|
# For Industrial Product Growth on Previous Period
|
|
1681
1736
|
# For countries with no data
|
|
1682
|
-
if country in ['ZAF','CHN']:
|
|
1737
|
+
if country in ['ZAF', 'CHN']:
|
|
1683
1738
|
Industrial_Product_Growth_on_Previous_Period_df_M = pd.DataFrame()
|
|
1684
1739
|
Industrial_Product_Growth_on_Previous_Period_df_Q = pd.DataFrame()
|
|
1685
1740
|
# For countries with quarterly data
|
|
1686
|
-
elif country in ['AUS','NZL']:
|
|
1741
|
+
elif country in ['AUS', 'NZL']:
|
|
1687
1742
|
Industrial_Product_Growth_on_Previous_Period_df_Q = data[1]['PRINTO01']['GPSA']
|
|
1688
|
-
Industrial_Product_Growth_on_Previous_Period_df_Q.rename('industrial_product_growth_on_previous_period',inplace=True)
|
|
1743
|
+
Industrial_Product_Growth_on_Previous_Period_df_Q.rename('industrial_product_growth_on_previous_period', inplace=True)
|
|
1689
1744
|
Industrial_Product_Growth_on_Previous_Period_df_M = pd.DataFrame()
|
|
1690
1745
|
# For countries with monthly data
|
|
1691
1746
|
else:
|
|
1692
1747
|
Industrial_Product_Growth_on_Previous_Period_df_M = data[0]['PRINTO01']['GPSA']
|
|
1693
|
-
Industrial_Product_Growth_on_Previous_Period_df_M.rename('industrial_product_growth_on_previous_period',inplace=True)
|
|
1748
|
+
Industrial_Product_Growth_on_Previous_Period_df_M.rename('industrial_product_growth_on_previous_period', inplace=True)
|
|
1694
1749
|
Industrial_Product_Growth_on_Previous_Period_df_Q = pd.DataFrame()
|
|
1695
1750
|
|
|
1696
1751
|
# For Industrial Production Index
|
|
1697
1752
|
# For countries with no data
|
|
1698
|
-
if country in ['ZAF','CHN']:
|
|
1753
|
+
if country in ['ZAF', 'CHN']:
|
|
1699
1754
|
Industrial_Production_Index_df_M = pd.DataFrame()
|
|
1700
1755
|
Industrial_Production_Index_df_Q = pd.DataFrame()
|
|
1701
1756
|
# For countries with quarterly data
|
|
1702
|
-
elif country in ['AUS','NZL']:
|
|
1757
|
+
elif country in ['AUS', 'NZL']:
|
|
1703
1758
|
Industrial_Production_Index_df_Q = data[1]['PRINTO01']['IXOBSA']
|
|
1704
|
-
Industrial_Production_Index_df_Q.rename('industrial_production_index',inplace=True)
|
|
1759
|
+
Industrial_Production_Index_df_Q.rename('industrial_production_index', inplace=True)
|
|
1705
1760
|
Industrial_Production_Index_df_M = pd.DataFrame()
|
|
1706
1761
|
# For countries with monthly data
|
|
1707
1762
|
else:
|
|
1708
1763
|
Industrial_Production_Index_df_M = data[0]['PRINTO01']['IXOBSA']
|
|
1709
|
-
Industrial_Production_Index_df_M.rename('industrial_production_index',inplace=True)
|
|
1764
|
+
Industrial_Production_Index_df_M.rename('industrial_production_index', inplace=True)
|
|
1710
1765
|
Industrial_Production_Index_df_Q = pd.DataFrame()
|
|
1711
1766
|
|
|
1712
|
-
#
|
|
1713
|
-
# USD_GBP_Exchange_Rate_df = data['GBR']['PRINTO01']['IXOBSA']
|
|
1714
|
-
# USD_GBP_Exchange_Rate_df.rename('Industrial Production Index',inplace=True)
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
# Create monthly macroeconomic dataframe
|
|
1767
|
+
# Create monthly macroeconomic dataframe
|
|
1721
1768
|
all_dfs_list_M = [Consumer_Confidence_Index_df_M,
|
|
1722
1769
|
Consumer_Price_Index_Cost_Of_Living_df_M,
|
|
1723
1770
|
Consumer_Price_Index_Inflation_df_M,
|
|
@@ -1728,11 +1775,11 @@ class datapull:
|
|
|
1728
1775
|
Industrial_Product_Growth_on_Previous_Period_df_M,
|
|
1729
1776
|
Industrial_Production_Index_df_M]
|
|
1730
1777
|
|
|
1731
|
-
# Check if any dataframes are empty and if there are remove them
|
|
1732
|
-
all_dfs_list_M = [df for df in all_dfs_list_M if not df.empty]
|
|
1733
|
-
cif_Macroeconomic_df_M = pd.concat(all_dfs_list_M,axis=1)
|
|
1778
|
+
# Check if any dataframes are empty and if there are remove them
|
|
1779
|
+
all_dfs_list_M = [df for df in all_dfs_list_M if not df.empty]
|
|
1780
|
+
cif_Macroeconomic_df_M = pd.concat(all_dfs_list_M, axis=1)
|
|
1734
1781
|
|
|
1735
|
-
# Create quarterly macroeconomic dataframe
|
|
1782
|
+
# Create quarterly macroeconomic dataframe
|
|
1736
1783
|
all_dfs_list_Q = [Consumer_Confidence_Index_df_Q,
|
|
1737
1784
|
Consumer_Price_Index_Cost_Of_Living_df_Q,
|
|
1738
1785
|
Consumer_Price_Index_Inflation_df_Q,
|
|
@@ -1743,38 +1790,38 @@ class datapull:
|
|
|
1743
1790
|
Industrial_Product_Growth_on_Previous_Period_df_Q,
|
|
1744
1791
|
Industrial_Production_Index_df_Q]
|
|
1745
1792
|
|
|
1746
|
-
# Check if any dataframes are empty and if there are remove them
|
|
1747
|
-
all_dfs_list_Q = [df for df in all_dfs_list_Q if not df.empty]
|
|
1793
|
+
# Check if any dataframes are empty and if there are remove them
|
|
1794
|
+
all_dfs_list_Q = [df for df in all_dfs_list_Q if not df.empty]
|
|
1748
1795
|
if all_dfs_list_Q != []:
|
|
1749
|
-
macroeconomic_monthly_df_Q = pd.concat(all_dfs_list_Q,axis=1)
|
|
1750
|
-
else:
|
|
1751
|
-
macroeconomic_monthly_df_Q
|
|
1796
|
+
macroeconomic_monthly_df_Q = pd.concat(all_dfs_list_Q, axis=1)
|
|
1797
|
+
else:
|
|
1798
|
+
macroeconomic_monthly_df_Q = pd.DataFrame()
|
|
1752
1799
|
|
|
1753
1800
|
# For USD GBP Exchange Rate
|
|
1754
1801
|
# If it's the UK add this series else don't
|
|
1755
1802
|
if countries_list[index] == 'GBR':
|
|
1756
|
-
USD_GBP_Exchange_Rate_df = pd.read_csv(
|
|
1803
|
+
USD_GBP_Exchange_Rate_df = pd.read_csv(
|
|
1804
|
+
'https://stats.oecd.org/SDMX-JSON/data/MEI_FIN/CCUS.' + countries_list[index] + '.M/OECD?contentType=csv')
|
|
1757
1805
|
USD_GBP_Exchange_Rate_df.head()
|
|
1758
|
-
USD_GBP_Exchange_Rate_df_pivot = pd.pivot_table(USD_GBP_Exchange_Rate_df,values='Value',index='TIME',
|
|
1806
|
+
USD_GBP_Exchange_Rate_df_pivot = pd.pivot_table(USD_GBP_Exchange_Rate_df, values='Value', index='TIME',
|
|
1807
|
+
columns='Subject')
|
|
1759
1808
|
USD_GBP_Exchange_Rate_df_pivot_final = USD_GBP_Exchange_Rate_df_pivot.loc["2015-01":]
|
|
1760
|
-
USD_GBP_Exchange_Rate_df_pivot_final.rename(
|
|
1809
|
+
USD_GBP_Exchange_Rate_df_pivot_final.rename(
|
|
1810
|
+
columns={'Currency exchange rates, monthly average': 'usd_gbp_exchange_rate'}, inplace=True)
|
|
1761
1811
|
|
|
1762
1812
|
# Create final monthly dataframe
|
|
1763
|
-
macroeconomic_monthly_df_M = pd.concat([cif_Macroeconomic_df_M,USD_GBP_Exchange_Rate_df_pivot_final],axis=1)
|
|
1813
|
+
macroeconomic_monthly_df_M = pd.concat([cif_Macroeconomic_df_M, USD_GBP_Exchange_Rate_df_pivot_final], axis=1)
|
|
1764
1814
|
else:
|
|
1765
1815
|
# Create final monthly dataframe
|
|
1766
1816
|
macroeconomic_monthly_df_M = cif_Macroeconomic_df_M
|
|
1767
1817
|
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
# Create the final W/C Sunday dataframe
|
|
1818
|
+
# Create the final W/C Sunday dataframe
|
|
1772
1819
|
# For monthly data
|
|
1773
|
-
macroeconomic_monthly_df_M['Date']=macroeconomic_monthly_df_M.index
|
|
1820
|
+
macroeconomic_monthly_df_M['Date'] = macroeconomic_monthly_df_M.index
|
|
1774
1821
|
df_M = macroeconomic_monthly_df_M.set_index(pd.to_datetime(macroeconomic_monthly_df_M['Date'])).drop(columns='Date')
|
|
1775
|
-
df_M.fillna(method="ffill",inplace=True)
|
|
1822
|
+
df_M.fillna(method="ffill", inplace=True)
|
|
1776
1823
|
df_M.reset_index(inplace=True)
|
|
1777
|
-
|
|
1824
|
+
|
|
1778
1825
|
daily_records = []
|
|
1779
1826
|
# Iterate over each row in the DataFrame
|
|
1780
1827
|
for _, row in df_M.iterrows():
|
|
@@ -1788,37 +1835,32 @@ class datapull:
|
|
|
1788
1835
|
|
|
1789
1836
|
# Convert the list of daily records into a DataFrame
|
|
1790
1837
|
daily_df = pd.DataFrame(daily_records)
|
|
1791
|
-
|
|
1792
|
-
# Extend dataframe to include the current data if needed
|
|
1793
|
-
datelist = pd.date_range(daily_df["Date"].iloc[-1]+pd.Timedelta(days=1),datetime.today()).tolist()
|
|
1794
|
-
extended_data = np.repeat([list(daily_df.iloc[-1,1:].values)],len(datelist),axis=0)
|
|
1795
|
-
q = pd.Series(datelist,name="Date")
|
|
1796
|
-
s = pd.DataFrame(extended_data,columns=list(df_M.columns[1:]))
|
|
1797
|
-
extended_daily_df = pd.concat([q,s],axis=1)
|
|
1798
|
-
extended_daily_df = daily_df.append(extended_daily_df, ignore_index=False)
|
|
1799
|
-
|
|
1800
|
-
# Create a week commencing column
|
|
1801
|
-
extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
|
|
1802
|
-
extended_daily_df['week_start'] = extended_daily_df["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
1803
|
-
extended_daily_df.drop("Date",axis=1,inplace=True)
|
|
1804
|
-
extended_daily_df.rename(columns={'week_start':"Date"},inplace=True)
|
|
1805
|
-
|
|
1806
|
-
# Take a weekly average
|
|
1807
|
-
macroeconomic_weekly_df_M = extended_daily_df.groupby('Date').mean()
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
1838
|
|
|
1839
|
+
# Extend dataframe to include the current data if needed
|
|
1840
|
+
datelist = pd.date_range(daily_df["Date"].iloc[-1] + pd.Timedelta(days=1), datetime.today()).tolist()
|
|
1841
|
+
extended_data = np.repeat([list(daily_df.iloc[-1, 1:].values)], len(datelist), axis=0)
|
|
1842
|
+
q = pd.Series(datelist, name="Date")
|
|
1843
|
+
s = pd.DataFrame(extended_data, columns=list(df_M.columns[1:]))
|
|
1844
|
+
extended_daily_df = pd.concat([q, s], axis=1)
|
|
1845
|
+
extended_daily_df = pd.concat([daily_df, extended_daily_df], ignore_index=False)
|
|
1813
1846
|
|
|
1847
|
+
# Create a week commencing column
|
|
1848
|
+
extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
|
|
1849
|
+
extended_daily_df['week_start'] = extended_daily_df["Date"].apply(
|
|
1850
|
+
lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
1851
|
+
extended_daily_df.drop("Date", axis=1, inplace=True)
|
|
1852
|
+
extended_daily_df.rename(columns={'week_start': "Date"}, inplace=True)
|
|
1814
1853
|
|
|
1854
|
+
# Take a weekly average
|
|
1855
|
+
macroeconomic_weekly_df_M = extended_daily_df.groupby('Date').mean()
|
|
1815
1856
|
|
|
1816
1857
|
# For quarterly data
|
|
1817
1858
|
# If there are quarterly datasets
|
|
1818
1859
|
if all_dfs_list_Q != []:
|
|
1819
|
-
macroeconomic_monthly_df_Q['Date']=macroeconomic_monthly_df_Q.index
|
|
1820
|
-
df_Q = macroeconomic_monthly_df_Q.set_index(pd.to_datetime(macroeconomic_monthly_df_Q['Date'])).drop(
|
|
1821
|
-
|
|
1860
|
+
macroeconomic_monthly_df_Q['Date'] = macroeconomic_monthly_df_Q.index
|
|
1861
|
+
df_Q = macroeconomic_monthly_df_Q.set_index(pd.to_datetime(macroeconomic_monthly_df_Q['Date'])).drop(
|
|
1862
|
+
columns='Date')
|
|
1863
|
+
df_Q.fillna(method="ffill", inplace=True)
|
|
1822
1864
|
df_Q.reset_index(inplace=True)
|
|
1823
1865
|
|
|
1824
1866
|
daily_records = []
|
|
@@ -1826,45 +1868,47 @@ class datapull:
|
|
|
1826
1868
|
year = row["Date"].year
|
|
1827
1869
|
month = row["Date"].month
|
|
1828
1870
|
day = row["Date"].day
|
|
1829
|
-
last_date = get_last_day_of_the_quarter(datetime(year,month,day).date())
|
|
1830
|
-
all_days = pd.date_range(row["Date"],last_date,freq="D")
|
|
1871
|
+
last_date = get_last_day_of_the_quarter(datetime(year, month, day).date())
|
|
1872
|
+
all_days = pd.date_range(row["Date"], last_date, freq="D")
|
|
1831
1873
|
|
|
1832
1874
|
# Create a new record for each day of the quarter
|
|
1833
1875
|
for day in all_days:
|
|
1834
1876
|
daily_row = row.copy()
|
|
1835
|
-
daily_row["Date"] = row["Date"].replace(day=day.day,month=day.month)
|
|
1877
|
+
daily_row["Date"] = row["Date"].replace(day=day.day, month=day.month)
|
|
1836
1878
|
daily_records.append(daily_row)
|
|
1837
1879
|
|
|
1838
1880
|
# Convert the list of daily records into a DataFrame
|
|
1839
1881
|
daily_df = pd.DataFrame(daily_records)
|
|
1840
|
-
|
|
1882
|
+
|
|
1841
1883
|
# Extend dataframe to include data up to today
|
|
1842
|
-
datelist = pd.date_range(daily_df["Date"].iloc[-1]+pd.Timedelta(days=1),datetime.today()).tolist()
|
|
1843
|
-
extended_data =
|
|
1844
|
-
q = pd.Series(datelist,name="Date")
|
|
1845
|
-
s = pd.DataFrame(extended_data,columns=list(df_Q.columns[1:]))
|
|
1846
|
-
extended_daily_df = pd.concat([q,s],axis=1)
|
|
1847
|
-
extended_daily_df =
|
|
1848
|
-
|
|
1884
|
+
datelist = pd.date_range(daily_df["Date"].iloc[-1] + pd.Timedelta(days=1), datetime.today()).tolist()
|
|
1885
|
+
extended_data = np.repeat([list(daily_df.iloc[-1, 1:].values)], len(datelist), axis=0)
|
|
1886
|
+
q = pd.Series(datelist, name="Date")
|
|
1887
|
+
s = pd.DataFrame(extended_data, columns=list(df_Q.columns[1:]))
|
|
1888
|
+
extended_daily_df = pd.concat([q, s], axis=1)
|
|
1889
|
+
extended_daily_df = pd.concat([daily_df, extended_daily_df], ignore_index=False)
|
|
1890
|
+
|
|
1849
1891
|
# Create a week commencing column
|
|
1850
|
-
extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
|
|
1851
|
-
extended_daily_df['week_start'] = extended_daily_df["Date"].apply(
|
|
1852
|
-
|
|
1853
|
-
extended_daily_df.
|
|
1854
|
-
|
|
1892
|
+
extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
|
|
1893
|
+
extended_daily_df['week_start'] = extended_daily_df["Date"].apply(
|
|
1894
|
+
lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
1895
|
+
extended_daily_df.drop("Date", axis=1, inplace=True)
|
|
1896
|
+
extended_daily_df.rename(columns={'week_start': "Date"}, inplace=True)
|
|
1897
|
+
|
|
1855
1898
|
# Take a weekly average
|
|
1856
1899
|
macroeconomic_weekly_df_Q = extended_daily_df.groupby('Date').mean()
|
|
1857
1900
|
|
|
1858
1901
|
# Merge the two datasets together
|
|
1859
1902
|
if all_dfs_list_Q != []:
|
|
1860
|
-
macroeconomic_weekly_df = macroeconomic_weekly_df_M.merge(macroeconomic_weekly_df_Q,left_index=True,
|
|
1903
|
+
macroeconomic_weekly_df = macroeconomic_weekly_df_M.merge(macroeconomic_weekly_df_Q, left_index=True,
|
|
1904
|
+
right_index=True)
|
|
1861
1905
|
# If there are no quarterly datasets
|
|
1862
1906
|
else:
|
|
1863
1907
|
macroeconomic_weekly_df = macroeconomic_weekly_df_M
|
|
1864
|
-
|
|
1865
|
-
# Change
|
|
1866
|
-
macroeconomic_weekly_df.index = macroeconomic_weekly_df.index.strftime('%d/%m/%Y')
|
|
1867
|
-
|
|
1908
|
+
|
|
1909
|
+
# Change datetime format
|
|
1910
|
+
macroeconomic_weekly_df.index = macroeconomic_weekly_df.index.strftime('%d/%m/%Y')
|
|
1911
|
+
|
|
1868
1912
|
macroeconomic_weekly_df.reset_index()
|
|
1869
1913
|
macroeconomic_weekly_df.reset_index(drop=False, inplace=True)
|
|
1870
1914
|
macroeconomic_weekly_df.rename(columns={'Date': 'OBS'}, inplace=True)
|
|
@@ -2050,12 +2094,11 @@ class datapull:
|
|
|
2050
2094
|
country = country_dict[country]
|
|
2051
2095
|
|
|
2052
2096
|
# Choose start and end dates
|
|
2053
|
-
import datetime
|
|
2054
2097
|
start_day = 1
|
|
2055
2098
|
start_month = 1
|
|
2056
2099
|
start_year = 2014
|
|
2057
|
-
formatted_date = datetime
|
|
2058
|
-
today = datetime.
|
|
2100
|
+
formatted_date = datetime(start_year, start_month, start_day).strftime("%Y-%m-%d")
|
|
2101
|
+
today = datetime.now()
|
|
2059
2102
|
end_day = today.day
|
|
2060
2103
|
end_month = today.month
|
|
2061
2104
|
end_year = today.year
|
|
@@ -2150,7 +2193,8 @@ class datapull:
|
|
|
2150
2193
|
weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2151
2194
|
|
|
2152
2195
|
# Group by week_starting and summarize
|
|
2153
|
-
|
|
2196
|
+
numeric_columns = weather.select_dtypes(include='number').columns
|
|
2197
|
+
weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
|
|
2154
2198
|
weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
|
|
2155
2199
|
"min_temp_f": "avg_min_temp_f",
|
|
2156
2200
|
"mean_temp_f": "avg_mean_temp_f",
|
|
@@ -2209,7 +2253,8 @@ class datapull:
|
|
|
2209
2253
|
weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2210
2254
|
|
|
2211
2255
|
# Group by week_starting and summarize
|
|
2212
|
-
|
|
2256
|
+
numeric_columns = weather.select_dtypes(include='number').columns
|
|
2257
|
+
weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
|
|
2213
2258
|
weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
|
|
2214
2259
|
"min_temp_f": "avg_min_temp_f",
|
|
2215
2260
|
"mean_temp_f": "avg_mean_temp_f",
|
|
@@ -2260,7 +2305,8 @@ class datapull:
|
|
|
2260
2305
|
weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2261
2306
|
|
|
2262
2307
|
# Group by week_starting and summarize
|
|
2263
|
-
|
|
2308
|
+
numeric_columns = weather.select_dtypes(include='number').columns
|
|
2309
|
+
weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
|
|
2264
2310
|
weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
|
|
2265
2311
|
"min_temp_f": "avg_min_temp_f",
|
|
2266
2312
|
"mean_temp_f": "avg_mean_temp_f",
|
|
@@ -2282,7 +2328,6 @@ class datapull:
|
|
|
2282
2328
|
|
|
2283
2329
|
# Loop through each city and fetch weather data
|
|
2284
2330
|
for city in cities:
|
|
2285
|
-
|
|
2286
2331
|
# Initialize Nominatim API
|
|
2287
2332
|
geolocator = Nominatim(user_agent="MyApp")
|
|
2288
2333
|
location = geolocator.geocode(city)
|
|
@@ -2318,7 +2363,8 @@ class datapull:
|
|
|
2318
2363
|
all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2319
2364
|
|
|
2320
2365
|
# Group by week_starting and summarize
|
|
2321
|
-
|
|
2366
|
+
numeric_columns = all_weather_data.select_dtypes(include='number').columns
|
|
2367
|
+
weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
|
|
2322
2368
|
weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
|
|
2323
2369
|
|
|
2324
2370
|
# Change index to datetime
|
|
@@ -2337,7 +2383,6 @@ class datapull:
|
|
|
2337
2383
|
|
|
2338
2384
|
# Loop through each city and fetch weather data
|
|
2339
2385
|
for city in cities:
|
|
2340
|
-
|
|
2341
2386
|
# Initialize Nominatim API
|
|
2342
2387
|
geolocator = Nominatim(user_agent="MyApp")
|
|
2343
2388
|
location = geolocator.geocode(city)
|
|
@@ -2373,7 +2418,8 @@ class datapull:
|
|
|
2373
2418
|
all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2374
2419
|
|
|
2375
2420
|
# Group by week_starting and summarize
|
|
2376
|
-
|
|
2421
|
+
numeric_columns = all_weather_data.select_dtypes(include='number').columns
|
|
2422
|
+
weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
|
|
2377
2423
|
weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
|
|
2378
2424
|
|
|
2379
2425
|
# Change index to datetime
|
|
@@ -2392,7 +2438,6 @@ class datapull:
|
|
|
2392
2438
|
|
|
2393
2439
|
# Loop through each city and fetch weather data
|
|
2394
2440
|
for city in cities:
|
|
2395
|
-
|
|
2396
2441
|
# Initialize Nominatim API
|
|
2397
2442
|
geolocator = Nominatim(user_agent="MyApp")
|
|
2398
2443
|
location = geolocator.geocode(city)
|
|
@@ -2428,7 +2473,8 @@ class datapull:
|
|
|
2428
2473
|
all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2429
2474
|
|
|
2430
2475
|
# Group by week_starting and summarize
|
|
2431
|
-
|
|
2476
|
+
numeric_columns = all_weather_data.select_dtypes(include='number').columns
|
|
2477
|
+
weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
|
|
2432
2478
|
weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
|
|
2433
2479
|
|
|
2434
2480
|
# Change index to datetime
|
|
@@ -2447,7 +2493,6 @@ class datapull:
|
|
|
2447
2493
|
|
|
2448
2494
|
# Loop through each city and fetch weather data
|
|
2449
2495
|
for city in cities:
|
|
2450
|
-
|
|
2451
2496
|
# Initialize Nominatim API
|
|
2452
2497
|
geolocator = Nominatim(user_agent="MyApp")
|
|
2453
2498
|
location = geolocator.geocode(city)
|
|
@@ -2483,7 +2528,8 @@ class datapull:
|
|
|
2483
2528
|
all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2484
2529
|
|
|
2485
2530
|
# Group by week_starting and summarize
|
|
2486
|
-
|
|
2531
|
+
numeric_columns = all_weather_data.select_dtypes(include='number').columns
|
|
2532
|
+
weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
|
|
2487
2533
|
weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
|
|
2488
2534
|
|
|
2489
2535
|
# Change index to datetime
|
|
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
|
2
2
|
dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
3
3
|
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
4
|
imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
|
|
5
|
-
imsciences/datafunctions.py,sha256=
|
|
5
|
+
imsciences/datafunctions.py,sha256=DKK03w47pFXl5xTPZ_0CdCmJP_IVwT1OLBVzK5-QjGU,133927
|
|
6
6
|
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
7
7
|
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
8
8
|
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
9
9
|
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
10
10
|
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
11
|
-
imsciences-0.6.0.
|
|
12
|
-
imsciences-0.6.0.
|
|
13
|
-
imsciences-0.6.0.
|
|
14
|
-
imsciences-0.6.0.
|
|
11
|
+
imsciences-0.6.0.3.dist-info/METADATA,sha256=8_UQYWlXnPmBctZ1w9wGltM-DN-PQmovqiQ9ERxe7Ng,11571
|
|
12
|
+
imsciences-0.6.0.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
13
|
+
imsciences-0.6.0.3.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
14
|
+
imsciences-0.6.0.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|