imsciences 0.6.0.1__py3-none-any.whl → 0.6.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/datafunctions.py +184 -170
- {imsciences-0.6.0.1.dist-info → imsciences-0.6.0.2.dist-info}/METADATA +1 -1
- {imsciences-0.6.0.1.dist-info → imsciences-0.6.0.2.dist-info}/RECORD +5 -5
- {imsciences-0.6.0.1.dist-info → imsciences-0.6.0.2.dist-info}/WHEEL +0 -0
- {imsciences-0.6.0.1.dist-info → imsciences-0.6.0.2.dist-info}/top_level.txt +0 -0
imsciences/datafunctions.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
import calendar
|
|
3
|
-
import requests
|
|
4
3
|
import os
|
|
5
4
|
import plotly.express as px
|
|
6
5
|
import plotly.graph_objs as go
|
|
@@ -18,7 +17,8 @@ import requests_cache
|
|
|
18
17
|
import urllib.request
|
|
19
18
|
import requests
|
|
20
19
|
from geopy.geocoders import Nominatim
|
|
21
|
-
|
|
20
|
+
import subprocess
|
|
21
|
+
import json
|
|
22
22
|
|
|
23
23
|
class dataprocessing:
|
|
24
24
|
|
|
@@ -180,7 +180,11 @@ class dataprocessing:
|
|
|
180
180
|
print(" - Description: Remove zero values in a specified column.")
|
|
181
181
|
print(" - Usage: remove_zero_values(self, data_frame, column_to_filter)")
|
|
182
182
|
print(" - Example: remove_zero_values(None, df, 'Funeral_Delivery')")
|
|
183
|
-
|
|
183
|
+
|
|
184
|
+
print("\n32. upgrade all packages")
|
|
185
|
+
print(" - Description: Upgrades all packages.")
|
|
186
|
+
print(" - Usage: upgrade_outdated_packages()")
|
|
187
|
+
print(" - Example: upgrade_outdated_packages()")
|
|
184
188
|
|
|
185
189
|
def get_wd_levels(self, levels):
|
|
186
190
|
"""
|
|
@@ -1194,6 +1198,38 @@ class dataprocessing:
|
|
|
1194
1198
|
|
|
1195
1199
|
return data_frame.loc[~(data_frame[column_to_filter] ==0)]
|
|
1196
1200
|
|
|
1201
|
+
def upgrade_outdated_packages(self):
|
|
1202
|
+
try:
|
|
1203
|
+
# Get all installed packages
|
|
1204
|
+
installed_packages_result = subprocess.run("pip list --format=json", shell=True, capture_output=True, text=True)
|
|
1205
|
+
installed_packages = json.loads(installed_packages_result.stdout)
|
|
1206
|
+
|
|
1207
|
+
# Get the list of outdated packages
|
|
1208
|
+
outdated_packages_result = subprocess.run("pip list --outdated --format=json", shell=True, capture_output=True, text=True)
|
|
1209
|
+
outdated_packages = json.loads(outdated_packages_result.stdout)
|
|
1210
|
+
|
|
1211
|
+
# Create a set of outdated package names for quick lookup
|
|
1212
|
+
outdated_package_names = {pkg['name'] for pkg in outdated_packages}
|
|
1213
|
+
|
|
1214
|
+
# Upgrade only outdated packages
|
|
1215
|
+
for package in installed_packages:
|
|
1216
|
+
package_name = package['name']
|
|
1217
|
+
if package_name in outdated_package_names:
|
|
1218
|
+
try:
|
|
1219
|
+
print(f"Upgrading package: {package_name}")
|
|
1220
|
+
upgrade_result = subprocess.run(f"pip install --upgrade {package_name}", shell=True, capture_output=True, text=True)
|
|
1221
|
+
if upgrade_result.returncode == 0:
|
|
1222
|
+
print(f"Successfully upgraded {package_name}")
|
|
1223
|
+
else:
|
|
1224
|
+
print(f"Failed to upgrade {package_name}: {upgrade_result.stderr}")
|
|
1225
|
+
except Exception as e:
|
|
1226
|
+
print(f"An error occurred while upgrading {package_name}: {e}")
|
|
1227
|
+
else:
|
|
1228
|
+
print(f"{package_name} is already up to date")
|
|
1229
|
+
except Exception as e:
|
|
1230
|
+
print(f"An error occurred during the upgrade process: {e}")
|
|
1231
|
+
|
|
1232
|
+
|
|
1197
1233
|
|
|
1198
1234
|
|
|
1199
1235
|
|
|
@@ -1483,14 +1519,13 @@ class datapull:
|
|
|
1483
1519
|
|
|
1484
1520
|
return ons_df_final
|
|
1485
1521
|
|
|
1486
|
-
def pull_macro(self, country
|
|
1487
|
-
|
|
1522
|
+
def pull_macro(self, country: str = "GBR", week_commencing: str = "mon"):
|
|
1488
1523
|
# Change country input to list
|
|
1489
1524
|
countries_list = [country]
|
|
1490
|
-
|
|
1525
|
+
|
|
1491
1526
|
# Check if the data wants to be inputted at any other week commencing date
|
|
1492
|
-
day_dict = {"mon"
|
|
1493
|
-
|
|
1527
|
+
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
1528
|
+
|
|
1494
1529
|
# Two useful functions for quarterly data
|
|
1495
1530
|
# Define a function to get quarterly data
|
|
1496
1531
|
def get_quarter(p_date: datetime.date) -> int:
|
|
@@ -1500,39 +1535,32 @@ class datapull:
|
|
|
1500
1535
|
def get_last_day_of_the_quarter(p_date: datetime.date):
|
|
1501
1536
|
quarter = get_quarter(p_date)
|
|
1502
1537
|
return datetime(p_date.year + 3 * quarter // 12, 3 * quarter % 12 + 1, 1) + pd.Timedelta(days=-1)
|
|
1503
|
-
|
|
1538
|
+
|
|
1504
1539
|
# For the monthly data
|
|
1505
|
-
data_M, subjects_M, measures_M = cif.createDataFrameFromOECD(countries
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
'PRINTO01'],
|
|
1525
|
-
measure = ['IXOBSA','IXNSA','IXNB','STSA','ST','GPSA','GY'], frequency = 'Q',startDate = '2015-01')
|
|
1526
|
-
|
|
1527
|
-
data_Q=data_Q.stack(level=[0,-1,-2]).reset_index()
|
|
1528
|
-
|
|
1529
|
-
# create a data frame dictionary to store your monthly data frames
|
|
1530
|
-
DataFrameDict_M = {elem : pd.DataFrame() for elem in countries_list}
|
|
1540
|
+
data_M, subjects_M, measures_M = cif.createDataFrameFromOECD(countries=countries_list, dsname='MEI',
|
|
1541
|
+
subject=['LCEAMN01', 'LCEAPR', 'CSCICP03', 'CPALTT01',
|
|
1542
|
+
'LRHUTTTT', 'LORSGPRT', 'IR3TIB01',
|
|
1543
|
+
'PRINTO01'],
|
|
1544
|
+
measure=['IXOBSA', 'IXNSA', 'IXNB', 'STSA', 'ST', 'GPSA', 'GY'],
|
|
1545
|
+
frequency='M', startDate='2015-01')
|
|
1546
|
+
data_M = data_M.stack(level=[0, -1, -2]).reset_index()
|
|
1547
|
+
|
|
1548
|
+
data_Q, subjects_Q, measures_Q = cif.createDataFrameFromOECD(countries=countries_list, dsname='MEI',
|
|
1549
|
+
subject=['LCEAMN01', 'LCEAPR', 'CSCICP03', 'CPALTT01',
|
|
1550
|
+
'LRHUTTTT', 'LORSGPRT', 'IR3TIB01',
|
|
1551
|
+
'PRINTO01'],
|
|
1552
|
+
measure=['IXOBSA', 'IXNSA', 'IXNB', 'STSA', 'ST', 'GPSA', 'GY'],
|
|
1553
|
+
frequency='Q', startDate='2015-01')
|
|
1554
|
+
|
|
1555
|
+
data_Q = data_Q.stack(level=[0, -1, -2]).reset_index()
|
|
1556
|
+
|
|
1557
|
+
# Create a data frame dictionary to store your monthly data frames
|
|
1558
|
+
DataFrameDict_M = {elem: pd.DataFrame() for elem in countries_list}
|
|
1531
1559
|
for key in DataFrameDict_M.keys():
|
|
1532
1560
|
DataFrameDict_M[key] = data_M[:][data_M.country == key]
|
|
1533
1561
|
|
|
1534
|
-
#
|
|
1535
|
-
DataFrameDict_Q = {elem
|
|
1562
|
+
# Create a data frame dictionary to store your quarterly data frames
|
|
1563
|
+
DataFrameDict_Q = {elem: pd.DataFrame() for elem in countries_list}
|
|
1536
1564
|
for key in DataFrameDict_Q.keys():
|
|
1537
1565
|
DataFrameDict_Q[key] = data_Q[:][data_Q.country == key]
|
|
1538
1566
|
|
|
@@ -1540,41 +1568,40 @@ class datapull:
|
|
|
1540
1568
|
countries_df_list_M = []
|
|
1541
1569
|
for i in countries_list:
|
|
1542
1570
|
df = pd.DataFrame(DataFrameDict_M[i])
|
|
1543
|
-
df.rename(columns={0:'Values'},inplace=True)
|
|
1544
|
-
df = pd.pivot_table(data=df,index='time',values='Values',columns=['subject','measure'])
|
|
1571
|
+
df.rename(columns={0: 'Values'}, inplace=True)
|
|
1572
|
+
df = pd.pivot_table(data=df, index='time', values='Values', columns=['subject', 'measure'])
|
|
1545
1573
|
countries_df_list_M.append(df)
|
|
1546
1574
|
|
|
1547
1575
|
# Create a quarterly list of the dataframes to iterate through
|
|
1548
1576
|
countries_df_list_Q = []
|
|
1549
1577
|
for i in countries_list:
|
|
1550
1578
|
df = pd.DataFrame(DataFrameDict_Q[i])
|
|
1551
|
-
df.rename(columns={0:'Values'},inplace=True)
|
|
1552
|
-
df = pd.pivot_table(data=df,index='time',values='Values',columns=['subject','measure'])
|
|
1579
|
+
df.rename(columns={0: 'Values'}, inplace=True)
|
|
1580
|
+
df = pd.pivot_table(data=df, index='time', values='Values', columns=['subject', 'measure'])
|
|
1553
1581
|
countries_df_list_Q.append(df)
|
|
1554
1582
|
|
|
1555
|
-
combined_countries_df_list = list(zip(countries_df_list_M,countries_df_list_Q))
|
|
1583
|
+
combined_countries_df_list = list(zip(countries_df_list_M, countries_df_list_Q))
|
|
1556
1584
|
|
|
1557
1585
|
# Loop through and create dataframes for every country
|
|
1558
1586
|
for index, data in enumerate(combined_countries_df_list):
|
|
1559
|
-
|
|
1560
1587
|
# Find country being extracted
|
|
1561
|
-
country = countries_list[index]
|
|
1588
|
+
country = countries_list[index]
|
|
1562
1589
|
print(country)
|
|
1563
1590
|
|
|
1564
1591
|
# For consumer confidence
|
|
1565
1592
|
# For countries with no data
|
|
1566
|
-
if country in ['CAN','IND','NOR']:
|
|
1593
|
+
if country in ['CAN', 'IND', 'NOR']:
|
|
1567
1594
|
Consumer_Confidence_Index_df_M = pd.DataFrame()
|
|
1568
1595
|
Consumer_Confidence_Index_df_Q = pd.DataFrame()
|
|
1569
|
-
# For countries with quarterly data
|
|
1596
|
+
# For countries with quarterly data
|
|
1570
1597
|
elif country in []:
|
|
1571
1598
|
Consumer_Confidence_Index_df_Q = data[1]['CSCICP03']['IXNSA']
|
|
1572
|
-
Consumer_Confidence_Index_df_Q.rename('consumer_confidence_index',inplace=True)
|
|
1599
|
+
Consumer_Confidence_Index_df_Q.rename('consumer_confidence_index', inplace=True)
|
|
1573
1600
|
Consumer_Confidence_Index_df_M = pd.DataFrame()
|
|
1574
1601
|
# For countries with monthly data
|
|
1575
1602
|
else:
|
|
1576
1603
|
Consumer_Confidence_Index_df_M = data[0]['CSCICP03']['IXNSA']
|
|
1577
|
-
Consumer_Confidence_Index_df_M.rename('consumer_confidence_index',inplace=True)
|
|
1604
|
+
Consumer_Confidence_Index_df_M.rename('consumer_confidence_index', inplace=True)
|
|
1578
1605
|
Consumer_Confidence_Index_df_Q = pd.DataFrame()
|
|
1579
1606
|
|
|
1580
1607
|
# For consumer prices for COST OF LIVING
|
|
@@ -1583,14 +1610,14 @@ class datapull:
|
|
|
1583
1610
|
Consumer_Price_Index_Cost_Of_Living_df_M = pd.DataFrame()
|
|
1584
1611
|
Consumer_Price_Index_Cost_Of_Living_df_Q = pd.DataFrame()
|
|
1585
1612
|
# For countries with quarterly data
|
|
1586
|
-
elif country in ['AUS','NZL']:
|
|
1613
|
+
elif country in ['AUS', 'NZL']:
|
|
1587
1614
|
Consumer_Price_Index_Cost_Of_Living_df_Q = data[1]['CPALTT01']['IXNB']
|
|
1588
|
-
Consumer_Price_Index_Cost_Of_Living_df_Q.rename('consumer_price_index_cost_of_living',inplace=True)
|
|
1615
|
+
Consumer_Price_Index_Cost_Of_Living_df_Q.rename('consumer_price_index_cost_of_living', inplace=True)
|
|
1589
1616
|
Consumer_Price_Index_Cost_Of_Living_df_M = pd.DataFrame()
|
|
1590
1617
|
# For countries with monthly data
|
|
1591
1618
|
else:
|
|
1592
1619
|
Consumer_Price_Index_Cost_Of_Living_df_M = data[0]['CPALTT01']['IXNB']
|
|
1593
|
-
Consumer_Price_Index_Cost_Of_Living_df_M.rename('consumer_price_index_cost_of_living',inplace=True)
|
|
1620
|
+
Consumer_Price_Index_Cost_Of_Living_df_M.rename('consumer_price_index_cost_of_living', inplace=True)
|
|
1594
1621
|
Consumer_Price_Index_Cost_Of_Living_df_Q = pd.DataFrame()
|
|
1595
1622
|
|
|
1596
1623
|
# For consumer prices FOR INFLATION
|
|
@@ -1599,125 +1626,113 @@ class datapull:
|
|
|
1599
1626
|
Consumer_Price_Index_Inflation_df_M = pd.DataFrame()
|
|
1600
1627
|
Consumer_Price_Index_Inflation_df_Q = pd.DataFrame()
|
|
1601
1628
|
# For countries with quarterly data
|
|
1602
|
-
elif country in ['AUS','NZL']:
|
|
1629
|
+
elif country in ['AUS', 'NZL']:
|
|
1603
1630
|
Consumer_Price_Index_Inflation_df_Q = data[1]['CPALTT01']['GY']
|
|
1604
|
-
Consumer_Price_Index_Inflation_df_Q.rename('consumer_price_index_inflation',inplace=True)
|
|
1631
|
+
Consumer_Price_Index_Inflation_df_Q.rename('consumer_price_index_inflation', inplace=True)
|
|
1605
1632
|
Consumer_Price_Index_Inflation_df_M = pd.DataFrame()
|
|
1606
1633
|
# For countries with monthly data
|
|
1607
1634
|
else:
|
|
1608
1635
|
Consumer_Price_Index_Inflation_df_M = data[0]['CPALTT01']['GY']
|
|
1609
|
-
Consumer_Price_Index_Inflation_df_M.rename('consumer_price_index_inflation',inplace=True)
|
|
1636
|
+
Consumer_Price_Index_Inflation_df_M.rename('consumer_price_index_inflation', inplace=True)
|
|
1610
1637
|
Consumer_Price_Index_Inflation_df_Q = pd.DataFrame()
|
|
1611
1638
|
|
|
1612
|
-
# For GDP Index Smoothed
|
|
1639
|
+
# For GDP Index Smoothed
|
|
1613
1640
|
# For countries with no data
|
|
1614
|
-
if country in ['NLD','CHE','NZL','SWE','NOR']:
|
|
1641
|
+
if country in ['NLD', 'CHE', 'NZL', 'SWE', 'NOR']:
|
|
1615
1642
|
GDP_Index_Smoothed_df_M = pd.DataFrame()
|
|
1616
1643
|
GDP_Index_Smoothed_df_Q = pd.DataFrame()
|
|
1617
1644
|
# For countries with quarterly data
|
|
1618
1645
|
elif country in []:
|
|
1619
1646
|
GDP_Index_Smoothed_df_Q = data[1]['LORSGPRT']['STSA']
|
|
1620
|
-
GDP_Index_Smoothed_df_Q.rename('gdp_index_smoothed',inplace=True)
|
|
1647
|
+
GDP_Index_Smoothed_df_Q.rename('gdp_index_smoothed', inplace=True)
|
|
1621
1648
|
GDP_Index_Smoothed_df_M = pd.DataFrame()
|
|
1622
1649
|
# For countries with monthly data
|
|
1623
1650
|
else:
|
|
1624
1651
|
GDP_Index_Smoothed_df_M = data[0]['LORSGPRT']['STSA']
|
|
1625
|
-
GDP_Index_Smoothed_df_M.rename('gdp_index_smoothed',inplace=True)
|
|
1652
|
+
GDP_Index_Smoothed_df_M.rename('gdp_index_smoothed', inplace=True)
|
|
1626
1653
|
GDP_Index_Smoothed_df_Q = pd.DataFrame()
|
|
1627
1654
|
|
|
1628
1655
|
# For Harmonised Unemployment Index
|
|
1629
1656
|
# For countries with no data
|
|
1630
|
-
if country in ['IND','CHE','ZAF','CHN']:
|
|
1657
|
+
if country in ['IND', 'CHE', 'ZAF', 'CHN']:
|
|
1631
1658
|
Harmonised_Unemployment_Index_df_M = pd.DataFrame()
|
|
1632
1659
|
Harmonised_Unemployment_Index_df_Q = pd.DataFrame()
|
|
1633
1660
|
# For countries with quarterly data
|
|
1634
1661
|
elif country in ['NZL']:
|
|
1635
1662
|
Harmonised_Unemployment_Index_df_Q = data[1]['LRHUTTTT']['STSA']
|
|
1636
|
-
Harmonised_Unemployment_Index_df_Q.rename('harmonised_unemployment_index',inplace=True)
|
|
1663
|
+
Harmonised_Unemployment_Index_df_Q.rename('harmonised_unemployment_index', inplace=True)
|
|
1637
1664
|
Harmonised_Unemployment_Index_df_M = pd.DataFrame()
|
|
1638
1665
|
# For countries with monthly data
|
|
1639
|
-
else:
|
|
1666
|
+
else:
|
|
1640
1667
|
Harmonised_Unemployment_Index_df_M = data[0]['LRHUTTTT']['STSA']
|
|
1641
|
-
Harmonised_Unemployment_Index_df_M.rename('harmonised_unemployment_index',inplace=True)
|
|
1668
|
+
Harmonised_Unemployment_Index_df_M.rename('harmonised_unemployment_index', inplace=True)
|
|
1642
1669
|
Harmonised_Unemployment_Index_df_Q = pd.DataFrame()
|
|
1643
1670
|
|
|
1644
|
-
# For hourly earnings index manufacturing
|
|
1671
|
+
# For hourly earnings index manufacturing
|
|
1645
1672
|
# For countries with no data
|
|
1646
|
-
if country in ['IND','CHE','ZAF','CHN']:
|
|
1673
|
+
if country in ['IND', 'CHE', 'ZAF', 'CHN']:
|
|
1647
1674
|
Hourly_Earnings_Index_Manufacturing_df_M = pd.DataFrame()
|
|
1648
1675
|
Hourly_Earnings_Index_Manufacturing_df_Q = pd.DataFrame()
|
|
1649
1676
|
# For countries with quarterly data
|
|
1650
|
-
elif country in ['FRA','DEU','ESP','AUS','NZL','KOR','NOR']:
|
|
1677
|
+
elif country in ['FRA', 'DEU', 'ESP', 'AUS', 'NZL', 'KOR', 'NOR']:
|
|
1651
1678
|
Hourly_Earnings_Index_Manufacturing_df_Q = data[1]['LCEAMN01']['IXOBSA']
|
|
1652
|
-
Hourly_Earnings_Index_Manufacturing_df_Q.rename('hourly_earnings_index_manufacturing',inplace=True)
|
|
1679
|
+
Hourly_Earnings_Index_Manufacturing_df_Q.rename('hourly_earnings_index_manufacturing', inplace=True)
|
|
1653
1680
|
Hourly_Earnings_Index_Manufacturing_df_M = pd.DataFrame()
|
|
1654
1681
|
# For countries with monthly data
|
|
1655
1682
|
else:
|
|
1656
1683
|
Hourly_Earnings_Index_Manufacturing_df_M = data[0]['LCEAMN01']['IXOBSA']
|
|
1657
|
-
Hourly_Earnings_Index_Manufacturing_df_M.rename('hourly_earnings_index_manufacturing',inplace=True)
|
|
1684
|
+
Hourly_Earnings_Index_Manufacturing_df_M.rename('hourly_earnings_index_manufacturing', inplace=True)
|
|
1658
1685
|
Hourly_Earnings_Index_Manufacturing_df_Q = pd.DataFrame()
|
|
1659
1686
|
|
|
1660
|
-
# For hourly earnings index private
|
|
1661
|
-
# Hourly_Earnings_Index_Private_df = data['GBR']['LCEAPR']['IXOBSA']
|
|
1662
|
-
# Hourly_Earnings_Index_Private_df.rename('Hourly Earnings Index Private',inplace=True)
|
|
1663
|
-
|
|
1664
1687
|
# For Short Term Interest Rate
|
|
1665
1688
|
# For countries with no data
|
|
1666
1689
|
if country in []:
|
|
1667
1690
|
Short_Term_Interest_Rate_df_M = pd.DataFrame()
|
|
1668
1691
|
Short_Term_Interest_Rate_df_Q = pd.DataFrame()
|
|
1669
|
-
# For countries with quarterly data
|
|
1692
|
+
# For countries with quarterly data
|
|
1670
1693
|
elif country in []:
|
|
1671
1694
|
Short_Term_Interest_Rate_df_Q = data[1]['IR3TIB01']['ST']
|
|
1672
|
-
Short_Term_Interest_Rate_df_Q.rename('short_term_interest_rate',inplace=True)
|
|
1695
|
+
Short_Term_Interest_Rate_df_Q.rename('short_term_interest_rate', inplace=True)
|
|
1673
1696
|
Short_Term_Interest_Rate_df_M = pd.DataFrame()
|
|
1674
1697
|
# For countries with monthly data
|
|
1675
1698
|
else:
|
|
1676
1699
|
Short_Term_Interest_Rate_df_M = data[0]['IR3TIB01']['ST']
|
|
1677
|
-
Short_Term_Interest_Rate_df_M.rename('short_term_interest_rate',inplace=True)
|
|
1700
|
+
Short_Term_Interest_Rate_df_M.rename('short_term_interest_rate', inplace=True)
|
|
1678
1701
|
Short_Term_Interest_Rate_df_Q = pd.DataFrame()
|
|
1679
1702
|
|
|
1680
1703
|
# For Industrial Product Growth on Previous Period
|
|
1681
1704
|
# For countries with no data
|
|
1682
|
-
if country in ['ZAF','CHN']:
|
|
1705
|
+
if country in ['ZAF', 'CHN']:
|
|
1683
1706
|
Industrial_Product_Growth_on_Previous_Period_df_M = pd.DataFrame()
|
|
1684
1707
|
Industrial_Product_Growth_on_Previous_Period_df_Q = pd.DataFrame()
|
|
1685
1708
|
# For countries with quarterly data
|
|
1686
|
-
elif country in ['AUS','NZL']:
|
|
1709
|
+
elif country in ['AUS', 'NZL']:
|
|
1687
1710
|
Industrial_Product_Growth_on_Previous_Period_df_Q = data[1]['PRINTO01']['GPSA']
|
|
1688
|
-
Industrial_Product_Growth_on_Previous_Period_df_Q.rename('industrial_product_growth_on_previous_period',inplace=True)
|
|
1711
|
+
Industrial_Product_Growth_on_Previous_Period_df_Q.rename('industrial_product_growth_on_previous_period', inplace=True)
|
|
1689
1712
|
Industrial_Product_Growth_on_Previous_Period_df_M = pd.DataFrame()
|
|
1690
1713
|
# For countries with monthly data
|
|
1691
1714
|
else:
|
|
1692
1715
|
Industrial_Product_Growth_on_Previous_Period_df_M = data[0]['PRINTO01']['GPSA']
|
|
1693
|
-
Industrial_Product_Growth_on_Previous_Period_df_M.rename('industrial_product_growth_on_previous_period',inplace=True)
|
|
1716
|
+
Industrial_Product_Growth_on_Previous_Period_df_M.rename('industrial_product_growth_on_previous_period', inplace=True)
|
|
1694
1717
|
Industrial_Product_Growth_on_Previous_Period_df_Q = pd.DataFrame()
|
|
1695
1718
|
|
|
1696
1719
|
# For Industrial Production Index
|
|
1697
1720
|
# For countries with no data
|
|
1698
|
-
if country in ['ZAF','CHN']:
|
|
1721
|
+
if country in ['ZAF', 'CHN']:
|
|
1699
1722
|
Industrial_Production_Index_df_M = pd.DataFrame()
|
|
1700
1723
|
Industrial_Production_Index_df_Q = pd.DataFrame()
|
|
1701
1724
|
# For countries with quarterly data
|
|
1702
|
-
elif country in ['AUS','NZL']:
|
|
1725
|
+
elif country in ['AUS', 'NZL']:
|
|
1703
1726
|
Industrial_Production_Index_df_Q = data[1]['PRINTO01']['IXOBSA']
|
|
1704
|
-
Industrial_Production_Index_df_Q.rename('industrial_production_index',inplace=True)
|
|
1727
|
+
Industrial_Production_Index_df_Q.rename('industrial_production_index', inplace=True)
|
|
1705
1728
|
Industrial_Production_Index_df_M = pd.DataFrame()
|
|
1706
1729
|
# For countries with monthly data
|
|
1707
1730
|
else:
|
|
1708
1731
|
Industrial_Production_Index_df_M = data[0]['PRINTO01']['IXOBSA']
|
|
1709
|
-
Industrial_Production_Index_df_M.rename('industrial_production_index',inplace=True)
|
|
1732
|
+
Industrial_Production_Index_df_M.rename('industrial_production_index', inplace=True)
|
|
1710
1733
|
Industrial_Production_Index_df_Q = pd.DataFrame()
|
|
1711
1734
|
|
|
1712
|
-
#
|
|
1713
|
-
# USD_GBP_Exchange_Rate_df = data['GBR']['PRINTO01']['IXOBSA']
|
|
1714
|
-
# USD_GBP_Exchange_Rate_df.rename('Industrial Production Index',inplace=True)
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
# Create monthly macroeconomic dataframe
|
|
1735
|
+
# Create monthly macroeconomic dataframe
|
|
1721
1736
|
all_dfs_list_M = [Consumer_Confidence_Index_df_M,
|
|
1722
1737
|
Consumer_Price_Index_Cost_Of_Living_df_M,
|
|
1723
1738
|
Consumer_Price_Index_Inflation_df_M,
|
|
@@ -1728,11 +1743,11 @@ class datapull:
|
|
|
1728
1743
|
Industrial_Product_Growth_on_Previous_Period_df_M,
|
|
1729
1744
|
Industrial_Production_Index_df_M]
|
|
1730
1745
|
|
|
1731
|
-
# Check if any dataframes are empty and if there are remove them
|
|
1732
|
-
all_dfs_list_M = [df for df in all_dfs_list_M if not df.empty]
|
|
1733
|
-
cif_Macroeconomic_df_M = pd.concat(all_dfs_list_M,axis=1)
|
|
1746
|
+
# Check if any dataframes are empty and if there are remove them
|
|
1747
|
+
all_dfs_list_M = [df for df in all_dfs_list_M if not df.empty]
|
|
1748
|
+
cif_Macroeconomic_df_M = pd.concat(all_dfs_list_M, axis=1)
|
|
1734
1749
|
|
|
1735
|
-
# Create quarterly macroeconomic dataframe
|
|
1750
|
+
# Create quarterly macroeconomic dataframe
|
|
1736
1751
|
all_dfs_list_Q = [Consumer_Confidence_Index_df_Q,
|
|
1737
1752
|
Consumer_Price_Index_Cost_Of_Living_df_Q,
|
|
1738
1753
|
Consumer_Price_Index_Inflation_df_Q,
|
|
@@ -1743,38 +1758,38 @@ class datapull:
|
|
|
1743
1758
|
Industrial_Product_Growth_on_Previous_Period_df_Q,
|
|
1744
1759
|
Industrial_Production_Index_df_Q]
|
|
1745
1760
|
|
|
1746
|
-
# Check if any dataframes are empty and if there are remove them
|
|
1747
|
-
all_dfs_list_Q = [df for df in all_dfs_list_Q if not df.empty]
|
|
1761
|
+
# Check if any dataframes are empty and if there are remove them
|
|
1762
|
+
all_dfs_list_Q = [df for df in all_dfs_list_Q if not df.empty]
|
|
1748
1763
|
if all_dfs_list_Q != []:
|
|
1749
|
-
macroeconomic_monthly_df_Q = pd.concat(all_dfs_list_Q,axis=1)
|
|
1750
|
-
else:
|
|
1751
|
-
macroeconomic_monthly_df_Q
|
|
1764
|
+
macroeconomic_monthly_df_Q = pd.concat(all_dfs_list_Q, axis=1)
|
|
1765
|
+
else:
|
|
1766
|
+
macroeconomic_monthly_df_Q = pd.DataFrame()
|
|
1752
1767
|
|
|
1753
1768
|
# For USD GBP Exchange Rate
|
|
1754
1769
|
# If it's the UK add this series else don't
|
|
1755
1770
|
if countries_list[index] == 'GBR':
|
|
1756
|
-
USD_GBP_Exchange_Rate_df = pd.read_csv(
|
|
1771
|
+
USD_GBP_Exchange_Rate_df = pd.read_csv(
|
|
1772
|
+
'https://stats.oecd.org/SDMX-JSON/data/MEI_FIN/CCUS.' + countries_list[index] + '.M/OECD?contentType=csv')
|
|
1757
1773
|
USD_GBP_Exchange_Rate_df.head()
|
|
1758
|
-
USD_GBP_Exchange_Rate_df_pivot = pd.pivot_table(USD_GBP_Exchange_Rate_df,values='Value',index='TIME',
|
|
1774
|
+
USD_GBP_Exchange_Rate_df_pivot = pd.pivot_table(USD_GBP_Exchange_Rate_df, values='Value', index='TIME',
|
|
1775
|
+
columns='Subject')
|
|
1759
1776
|
USD_GBP_Exchange_Rate_df_pivot_final = USD_GBP_Exchange_Rate_df_pivot.loc["2015-01":]
|
|
1760
|
-
USD_GBP_Exchange_Rate_df_pivot_final.rename(
|
|
1777
|
+
USD_GBP_Exchange_Rate_df_pivot_final.rename(
|
|
1778
|
+
columns={'Currency exchange rates, monthly average': 'usd_gbp_exchange_rate'}, inplace=True)
|
|
1761
1779
|
|
|
1762
1780
|
# Create final monthly dataframe
|
|
1763
|
-
macroeconomic_monthly_df_M = pd.concat([cif_Macroeconomic_df_M,USD_GBP_Exchange_Rate_df_pivot_final],axis=1)
|
|
1781
|
+
macroeconomic_monthly_df_M = pd.concat([cif_Macroeconomic_df_M, USD_GBP_Exchange_Rate_df_pivot_final], axis=1)
|
|
1764
1782
|
else:
|
|
1765
1783
|
# Create final monthly dataframe
|
|
1766
1784
|
macroeconomic_monthly_df_M = cif_Macroeconomic_df_M
|
|
1767
1785
|
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
# Create the final W/C Sunday dataframe
|
|
1786
|
+
# Create the final W/C Sunday dataframe
|
|
1772
1787
|
# For monthly data
|
|
1773
|
-
macroeconomic_monthly_df_M['Date']=macroeconomic_monthly_df_M.index
|
|
1788
|
+
macroeconomic_monthly_df_M['Date'] = macroeconomic_monthly_df_M.index
|
|
1774
1789
|
df_M = macroeconomic_monthly_df_M.set_index(pd.to_datetime(macroeconomic_monthly_df_M['Date'])).drop(columns='Date')
|
|
1775
|
-
df_M.fillna(method="ffill",inplace=True)
|
|
1790
|
+
df_M.fillna(method="ffill", inplace=True)
|
|
1776
1791
|
df_M.reset_index(inplace=True)
|
|
1777
|
-
|
|
1792
|
+
|
|
1778
1793
|
daily_records = []
|
|
1779
1794
|
# Iterate over each row in the DataFrame
|
|
1780
1795
|
for _, row in df_M.iterrows():
|
|
@@ -1788,37 +1803,32 @@ class datapull:
|
|
|
1788
1803
|
|
|
1789
1804
|
# Convert the list of daily records into a DataFrame
|
|
1790
1805
|
daily_df = pd.DataFrame(daily_records)
|
|
1791
|
-
|
|
1792
|
-
# Extend dataframe to include the current data if needed
|
|
1793
|
-
datelist = pd.date_range(daily_df["Date"].iloc[-1]+pd.Timedelta(days=1),datetime.today()).tolist()
|
|
1794
|
-
extended_data = np.repeat([list(daily_df.iloc[-1,1:].values)],len(datelist),axis=0)
|
|
1795
|
-
q = pd.Series(datelist,name="Date")
|
|
1796
|
-
s = pd.DataFrame(extended_data,columns=list(df_M.columns[1:]))
|
|
1797
|
-
extended_daily_df = pd.concat([q,s],axis=1)
|
|
1798
|
-
extended_daily_df = daily_df.append(extended_daily_df, ignore_index=False)
|
|
1799
|
-
|
|
1800
|
-
# Create a week commencing column
|
|
1801
|
-
extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
|
|
1802
|
-
extended_daily_df['week_start'] = extended_daily_df["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
1803
|
-
extended_daily_df.drop("Date",axis=1,inplace=True)
|
|
1804
|
-
extended_daily_df.rename(columns={'week_start':"Date"},inplace=True)
|
|
1805
|
-
|
|
1806
|
-
# Take a weekly average
|
|
1807
|
-
macroeconomic_weekly_df_M = extended_daily_df.groupby('Date').mean()
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
1806
|
|
|
1807
|
+
# Extend dataframe to include the current data if needed
|
|
1808
|
+
datelist = pd.date_range(daily_df["Date"].iloc[-1] + pd.Timedelta(days=1), datetime.today()).tolist()
|
|
1809
|
+
extended_data = np.repeat([list(daily_df.iloc[-1, 1:].values)], len(datelist), axis=0)
|
|
1810
|
+
q = pd.Series(datelist, name="Date")
|
|
1811
|
+
s = pd.DataFrame(extended_data, columns=list(df_M.columns[1:]))
|
|
1812
|
+
extended_daily_df = pd.concat([q, s], axis=1)
|
|
1813
|
+
extended_daily_df = pd.concat([daily_df, extended_daily_df], ignore_index=False)
|
|
1813
1814
|
|
|
1815
|
+
# Create a week commencing column
|
|
1816
|
+
extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
|
|
1817
|
+
extended_daily_df['week_start'] = extended_daily_df["Date"].apply(
|
|
1818
|
+
lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
1819
|
+
extended_daily_df.drop("Date", axis=1, inplace=True)
|
|
1820
|
+
extended_daily_df.rename(columns={'week_start': "Date"}, inplace=True)
|
|
1814
1821
|
|
|
1822
|
+
# Take a weekly average
|
|
1823
|
+
macroeconomic_weekly_df_M = extended_daily_df.groupby('Date').mean()
|
|
1815
1824
|
|
|
1816
1825
|
# For quarterly data
|
|
1817
1826
|
# If there are quarterly datasets
|
|
1818
1827
|
if all_dfs_list_Q != []:
|
|
1819
|
-
macroeconomic_monthly_df_Q['Date']=macroeconomic_monthly_df_Q.index
|
|
1820
|
-
df_Q = macroeconomic_monthly_df_Q.set_index(pd.to_datetime(macroeconomic_monthly_df_Q['Date'])).drop(
|
|
1821
|
-
|
|
1828
|
+
macroeconomic_monthly_df_Q['Date'] = macroeconomic_monthly_df_Q.index
|
|
1829
|
+
df_Q = macroeconomic_monthly_df_Q.set_index(pd.to_datetime(macroeconomic_monthly_df_Q['Date'])).drop(
|
|
1830
|
+
columns='Date')
|
|
1831
|
+
df_Q.fillna(method="ffill", inplace=True)
|
|
1822
1832
|
df_Q.reset_index(inplace=True)
|
|
1823
1833
|
|
|
1824
1834
|
daily_records = []
|
|
@@ -1826,45 +1836,47 @@ class datapull:
|
|
|
1826
1836
|
year = row["Date"].year
|
|
1827
1837
|
month = row["Date"].month
|
|
1828
1838
|
day = row["Date"].day
|
|
1829
|
-
last_date = get_last_day_of_the_quarter(datetime(year,month,day).date())
|
|
1830
|
-
all_days = pd.date_range(row["Date"],last_date,freq="D")
|
|
1839
|
+
last_date = get_last_day_of_the_quarter(datetime(year, month, day).date())
|
|
1840
|
+
all_days = pd.date_range(row["Date"], last_date, freq="D")
|
|
1831
1841
|
|
|
1832
1842
|
# Create a new record for each day of the quarter
|
|
1833
1843
|
for day in all_days:
|
|
1834
1844
|
daily_row = row.copy()
|
|
1835
|
-
daily_row["Date"] = row["Date"].replace(day=day.day,month=day.month)
|
|
1845
|
+
daily_row["Date"] = row["Date"].replace(day=day.day, month=day.month)
|
|
1836
1846
|
daily_records.append(daily_row)
|
|
1837
1847
|
|
|
1838
1848
|
# Convert the list of daily records into a DataFrame
|
|
1839
1849
|
daily_df = pd.DataFrame(daily_records)
|
|
1840
|
-
|
|
1850
|
+
|
|
1841
1851
|
# Extend dataframe to include data up to today
|
|
1842
|
-
datelist = pd.date_range(daily_df["Date"].iloc[-1]+pd.Timedelta(days=1),datetime.today()).tolist()
|
|
1843
|
-
extended_data =
|
|
1844
|
-
q = pd.Series(datelist,name="Date")
|
|
1845
|
-
s = pd.DataFrame(extended_data,columns=list(df_Q.columns[1:]))
|
|
1846
|
-
extended_daily_df = pd.concat([q,s],axis=1)
|
|
1847
|
-
extended_daily_df =
|
|
1848
|
-
|
|
1852
|
+
datelist = pd.date_range(daily_df["Date"].iloc[-1] + pd.Timedelta(days=1), datetime.today()).tolist()
|
|
1853
|
+
extended_data = np.repeat([list(daily_df.iloc[-1, 1:].values)], len(datelist), axis=0)
|
|
1854
|
+
q = pd.Series(datelist, name="Date")
|
|
1855
|
+
s = pd.DataFrame(extended_data, columns=list(df_Q.columns[1:]))
|
|
1856
|
+
extended_daily_df = pd.concat([q, s], axis=1)
|
|
1857
|
+
extended_daily_df = pd.concat([daily_df, extended_daily_df], ignore_index=False)
|
|
1858
|
+
|
|
1849
1859
|
# Create a week commencing column
|
|
1850
|
-
extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
|
|
1851
|
-
extended_daily_df['week_start'] = extended_daily_df["Date"].apply(
|
|
1852
|
-
|
|
1853
|
-
extended_daily_df.
|
|
1854
|
-
|
|
1860
|
+
extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
|
|
1861
|
+
extended_daily_df['week_start'] = extended_daily_df["Date"].apply(
|
|
1862
|
+
lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
1863
|
+
extended_daily_df.drop("Date", axis=1, inplace=True)
|
|
1864
|
+
extended_daily_df.rename(columns={'week_start': "Date"}, inplace=True)
|
|
1865
|
+
|
|
1855
1866
|
# Take a weekly average
|
|
1856
1867
|
macroeconomic_weekly_df_Q = extended_daily_df.groupby('Date').mean()
|
|
1857
1868
|
|
|
1858
1869
|
# Merge the two datasets together
|
|
1859
1870
|
if all_dfs_list_Q != []:
|
|
1860
|
-
macroeconomic_weekly_df = macroeconomic_weekly_df_M.merge(macroeconomic_weekly_df_Q,left_index=True,
|
|
1871
|
+
macroeconomic_weekly_df = macroeconomic_weekly_df_M.merge(macroeconomic_weekly_df_Q, left_index=True,
|
|
1872
|
+
right_index=True)
|
|
1861
1873
|
# If there are no quarterly datasets
|
|
1862
1874
|
else:
|
|
1863
1875
|
macroeconomic_weekly_df = macroeconomic_weekly_df_M
|
|
1864
|
-
|
|
1865
|
-
# Change
|
|
1866
|
-
macroeconomic_weekly_df.index = macroeconomic_weekly_df.index.strftime('%d/%m/%Y')
|
|
1867
|
-
|
|
1876
|
+
|
|
1877
|
+
# Change datetime format
|
|
1878
|
+
macroeconomic_weekly_df.index = macroeconomic_weekly_df.index.strftime('%d/%m/%Y')
|
|
1879
|
+
|
|
1868
1880
|
macroeconomic_weekly_df.reset_index()
|
|
1869
1881
|
macroeconomic_weekly_df.reset_index(drop=False, inplace=True)
|
|
1870
1882
|
macroeconomic_weekly_df.rename(columns={'Date': 'OBS'}, inplace=True)
|
|
@@ -2050,12 +2062,11 @@ class datapull:
|
|
|
2050
2062
|
country = country_dict[country]
|
|
2051
2063
|
|
|
2052
2064
|
# Choose start and end dates
|
|
2053
|
-
import datetime
|
|
2054
2065
|
start_day = 1
|
|
2055
2066
|
start_month = 1
|
|
2056
2067
|
start_year = 2014
|
|
2057
|
-
formatted_date = datetime
|
|
2058
|
-
today = datetime.
|
|
2068
|
+
formatted_date = datetime(start_year, start_month, start_day).strftime("%Y-%m-%d")
|
|
2069
|
+
today = datetime.now()
|
|
2059
2070
|
end_day = today.day
|
|
2060
2071
|
end_month = today.month
|
|
2061
2072
|
end_year = today.year
|
|
@@ -2150,7 +2161,8 @@ class datapull:
|
|
|
2150
2161
|
weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2151
2162
|
|
|
2152
2163
|
# Group by week_starting and summarize
|
|
2153
|
-
|
|
2164
|
+
numeric_columns = weather.select_dtypes(include='number').columns
|
|
2165
|
+
weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
|
|
2154
2166
|
weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
|
|
2155
2167
|
"min_temp_f": "avg_min_temp_f",
|
|
2156
2168
|
"mean_temp_f": "avg_mean_temp_f",
|
|
@@ -2209,7 +2221,8 @@ class datapull:
|
|
|
2209
2221
|
weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2210
2222
|
|
|
2211
2223
|
# Group by week_starting and summarize
|
|
2212
|
-
|
|
2224
|
+
numeric_columns = weather.select_dtypes(include='number').columns
|
|
2225
|
+
weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
|
|
2213
2226
|
weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
|
|
2214
2227
|
"min_temp_f": "avg_min_temp_f",
|
|
2215
2228
|
"mean_temp_f": "avg_mean_temp_f",
|
|
@@ -2260,7 +2273,8 @@ class datapull:
|
|
|
2260
2273
|
weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2261
2274
|
|
|
2262
2275
|
# Group by week_starting and summarize
|
|
2263
|
-
|
|
2276
|
+
numeric_columns = weather.select_dtypes(include='number').columns
|
|
2277
|
+
weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
|
|
2264
2278
|
weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
|
|
2265
2279
|
"min_temp_f": "avg_min_temp_f",
|
|
2266
2280
|
"mean_temp_f": "avg_mean_temp_f",
|
|
@@ -2282,7 +2296,6 @@ class datapull:
|
|
|
2282
2296
|
|
|
2283
2297
|
# Loop through each city and fetch weather data
|
|
2284
2298
|
for city in cities:
|
|
2285
|
-
|
|
2286
2299
|
# Initialize Nominatim API
|
|
2287
2300
|
geolocator = Nominatim(user_agent="MyApp")
|
|
2288
2301
|
location = geolocator.geocode(city)
|
|
@@ -2318,7 +2331,8 @@ class datapull:
|
|
|
2318
2331
|
all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2319
2332
|
|
|
2320
2333
|
# Group by week_starting and summarize
|
|
2321
|
-
|
|
2334
|
+
numeric_columns = all_weather_data.select_dtypes(include='number').columns
|
|
2335
|
+
weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
|
|
2322
2336
|
weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
|
|
2323
2337
|
|
|
2324
2338
|
# Change index to datetime
|
|
@@ -2337,7 +2351,6 @@ class datapull:
|
|
|
2337
2351
|
|
|
2338
2352
|
# Loop through each city and fetch weather data
|
|
2339
2353
|
for city in cities:
|
|
2340
|
-
|
|
2341
2354
|
# Initialize Nominatim API
|
|
2342
2355
|
geolocator = Nominatim(user_agent="MyApp")
|
|
2343
2356
|
location = geolocator.geocode(city)
|
|
@@ -2373,7 +2386,8 @@ class datapull:
|
|
|
2373
2386
|
all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2374
2387
|
|
|
2375
2388
|
# Group by week_starting and summarize
|
|
2376
|
-
|
|
2389
|
+
numeric_columns = all_weather_data.select_dtypes(include='number').columns
|
|
2390
|
+
weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
|
|
2377
2391
|
weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
|
|
2378
2392
|
|
|
2379
2393
|
# Change index to datetime
|
|
@@ -2392,7 +2406,6 @@ class datapull:
|
|
|
2392
2406
|
|
|
2393
2407
|
# Loop through each city and fetch weather data
|
|
2394
2408
|
for city in cities:
|
|
2395
|
-
|
|
2396
2409
|
# Initialize Nominatim API
|
|
2397
2410
|
geolocator = Nominatim(user_agent="MyApp")
|
|
2398
2411
|
location = geolocator.geocode(city)
|
|
@@ -2428,7 +2441,8 @@ class datapull:
|
|
|
2428
2441
|
all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2429
2442
|
|
|
2430
2443
|
# Group by week_starting and summarize
|
|
2431
|
-
|
|
2444
|
+
numeric_columns = all_weather_data.select_dtypes(include='number').columns
|
|
2445
|
+
weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
|
|
2432
2446
|
weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
|
|
2433
2447
|
|
|
2434
2448
|
# Change index to datetime
|
|
@@ -2447,7 +2461,6 @@ class datapull:
|
|
|
2447
2461
|
|
|
2448
2462
|
# Loop through each city and fetch weather data
|
|
2449
2463
|
for city in cities:
|
|
2450
|
-
|
|
2451
2464
|
# Initialize Nominatim API
|
|
2452
2465
|
geolocator = Nominatim(user_agent="MyApp")
|
|
2453
2466
|
location = geolocator.geocode(city)
|
|
@@ -2483,7 +2496,8 @@ class datapull:
|
|
|
2483
2496
|
all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2484
2497
|
|
|
2485
2498
|
# Group by week_starting and summarize
|
|
2486
|
-
|
|
2499
|
+
numeric_columns = all_weather_data.select_dtypes(include='number').columns
|
|
2500
|
+
weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
|
|
2487
2501
|
weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
|
|
2488
2502
|
|
|
2489
2503
|
# Change index to datetime
|
|
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
|
2
2
|
dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
3
3
|
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
4
|
imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
|
|
5
|
-
imsciences/datafunctions.py,sha256=
|
|
5
|
+
imsciences/datafunctions.py,sha256=vrv-6H8iccN23bdn5OqBHLsWfscrKOWvVyAtrlkgyd4,132385
|
|
6
6
|
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
7
7
|
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
8
8
|
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
9
9
|
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
10
10
|
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
11
|
-
imsciences-0.6.0.
|
|
12
|
-
imsciences-0.6.0.
|
|
13
|
-
imsciences-0.6.0.
|
|
14
|
-
imsciences-0.6.0.
|
|
11
|
+
imsciences-0.6.0.2.dist-info/METADATA,sha256=jB4rv8_8NBrTNNUi1WWT7-WZ5R2u11IxJIieDQJ5hm8,11571
|
|
12
|
+
imsciences-0.6.0.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
13
|
+
imsciences-0.6.0.2.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
14
|
+
imsciences-0.6.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|