imsciences 0.6.0.0__tar.gz → 0.6.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.6.0.0
3
+ Version: 0.6.0.2
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -1,6 +1,5 @@
1
1
  import pandas as pd
2
2
  import calendar
3
- import requests
4
3
  import os
5
4
  import plotly.express as px
6
5
  import plotly.graph_objs as go
@@ -18,7 +17,8 @@ import requests_cache
18
17
  import urllib.request
19
18
  import requests
20
19
  from geopy.geocoders import Nominatim
21
-
20
+ import subprocess
21
+ import json
22
22
 
23
23
  class dataprocessing:
24
24
 
@@ -180,7 +180,11 @@ class dataprocessing:
180
180
  print(" - Description: Remove zero values in a specified column.")
181
181
  print(" - Usage: remove_zero_values(self, data_frame, column_to_filter)")
182
182
  print(" - Example: remove_zero_values(None, df, 'Funeral_Delivery')")
183
-
183
+
184
+ print("\n32. upgrade all packages")
185
+ print(" - Description: Upgrades all packages.")
186
+ print(" - Usage: upgrade_outdated_packages()")
187
+ print(" - Example: upgrade_outdated_packages()")
184
188
 
185
189
  def get_wd_levels(self, levels):
186
190
  """
@@ -1194,6 +1198,38 @@ class dataprocessing:
1194
1198
 
1195
1199
  return data_frame.loc[~(data_frame[column_to_filter] ==0)]
1196
1200
 
1201
+ def upgrade_outdated_packages(self):
1202
+ try:
1203
+ # Get all installed packages
1204
+ installed_packages_result = subprocess.run("pip list --format=json", shell=True, capture_output=True, text=True)
1205
+ installed_packages = json.loads(installed_packages_result.stdout)
1206
+
1207
+ # Get the list of outdated packages
1208
+ outdated_packages_result = subprocess.run("pip list --outdated --format=json", shell=True, capture_output=True, text=True)
1209
+ outdated_packages = json.loads(outdated_packages_result.stdout)
1210
+
1211
+ # Create a set of outdated package names for quick lookup
1212
+ outdated_package_names = {pkg['name'] for pkg in outdated_packages}
1213
+
1214
+ # Upgrade only outdated packages
1215
+ for package in installed_packages:
1216
+ package_name = package['name']
1217
+ if package_name in outdated_package_names:
1218
+ try:
1219
+ print(f"Upgrading package: {package_name}")
1220
+ upgrade_result = subprocess.run(f"pip install --upgrade {package_name}", shell=True, capture_output=True, text=True)
1221
+ if upgrade_result.returncode == 0:
1222
+ print(f"Successfully upgraded {package_name}")
1223
+ else:
1224
+ print(f"Failed to upgrade {package_name}: {upgrade_result.stderr}")
1225
+ except Exception as e:
1226
+ print(f"An error occurred while upgrading {package_name}: {e}")
1227
+ else:
1228
+ print(f"{package_name} is already up to date")
1229
+ except Exception as e:
1230
+ print(f"An error occurred during the upgrade process: {e}")
1231
+
1232
+
1197
1233
 
1198
1234
 
1199
1235
 
@@ -1483,14 +1519,13 @@ class datapull:
1483
1519
 
1484
1520
  return ons_df_final
1485
1521
 
1486
- def pull_macro(self, country : str = "GBR", week_commencing : str = "mon"):
1487
-
1522
+ def pull_macro(self, country: str = "GBR", week_commencing: str = "mon"):
1488
1523
  # Change country input to list
1489
1524
  countries_list = [country]
1490
-
1525
+
1491
1526
  # Check if the data wants to be inputted at any other week commencing date
1492
- day_dict = {"mon" : 0, "tue" : 1, "wed" : 2, "thur" : 3, "fri" : 4, "sat" : 5, "sun" : 6}
1493
-
1527
+ day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
1528
+
1494
1529
  # Two useful functions for quarterly data
1495
1530
  # Define a function to get quarterly data
1496
1531
  def get_quarter(p_date: datetime.date) -> int:
@@ -1499,40 +1534,33 @@ class datapull:
1499
1534
  # Define a function to get the last day of the quarter
1500
1535
  def get_last_day_of_the_quarter(p_date: datetime.date):
1501
1536
  quarter = get_quarter(p_date)
1502
- return datetime(p_date.year + 3 * quarter // 12, 3 * quarter % 12 + 1, 1) + datetime.timedelta(days=-1)
1503
-
1537
+ return datetime(p_date.year + 3 * quarter // 12, 3 * quarter % 12 + 1, 1) + pd.Timedelta(days=-1)
1538
+
1504
1539
  # For the monthly data
1505
- data_M, subjects_M, measures_M = cif.createDataFrameFromOECD(countries = countries_list, dsname = 'MEI',subject = ['LCEAMN01',
1506
- 'LCEAPR',
1507
- 'CSCICP03',
1508
- 'CPALTT01',
1509
- 'LRHUTTTT',
1510
- 'LORSGPRT',
1511
- 'IR3TIB01',
1512
- 'PRINTO01'],
1513
- measure = ['IXOBSA','IXNSA','IXNB','STSA','ST','GPSA','GY'],
1514
- frequency = 'M', startDate = '2015-01')
1515
- data_M = data_M.stack(level=[0,-1,-2]).reset_index()
1516
-
1517
- data_Q, subjects_Q, measures_Q = cif.createDataFrameFromOECD(countries = countries_list, dsname = 'MEI',subject = ['LCEAMN01',
1518
- 'LCEAPR',
1519
- 'CSCICP03',
1520
- 'CPALTT01',
1521
- 'LRHUTTTT',
1522
- 'LORSGPRT',
1523
- 'IR3TIB01',
1524
- 'PRINTO01'],
1525
- measure = ['IXOBSA','IXNSA','IXNB','STSA','ST','GPSA','GY'], frequency = 'Q',startDate = '2015-01')
1526
-
1527
- data_Q=data_Q.stack(level=[0,-1,-2]).reset_index()
1528
-
1529
- # create a data frame dictionary to store your monthly data frames
1530
- DataFrameDict_M = {elem : pd.DataFrame() for elem in countries_list}
1540
+ data_M, subjects_M, measures_M = cif.createDataFrameFromOECD(countries=countries_list, dsname='MEI',
1541
+ subject=['LCEAMN01', 'LCEAPR', 'CSCICP03', 'CPALTT01',
1542
+ 'LRHUTTTT', 'LORSGPRT', 'IR3TIB01',
1543
+ 'PRINTO01'],
1544
+ measure=['IXOBSA', 'IXNSA', 'IXNB', 'STSA', 'ST', 'GPSA', 'GY'],
1545
+ frequency='M', startDate='2015-01')
1546
+ data_M = data_M.stack(level=[0, -1, -2]).reset_index()
1547
+
1548
+ data_Q, subjects_Q, measures_Q = cif.createDataFrameFromOECD(countries=countries_list, dsname='MEI',
1549
+ subject=['LCEAMN01', 'LCEAPR', 'CSCICP03', 'CPALTT01',
1550
+ 'LRHUTTTT', 'LORSGPRT', 'IR3TIB01',
1551
+ 'PRINTO01'],
1552
+ measure=['IXOBSA', 'IXNSA', 'IXNB', 'STSA', 'ST', 'GPSA', 'GY'],
1553
+ frequency='Q', startDate='2015-01')
1554
+
1555
+ data_Q = data_Q.stack(level=[0, -1, -2]).reset_index()
1556
+
1557
+ # Create a data frame dictionary to store your monthly data frames
1558
+ DataFrameDict_M = {elem: pd.DataFrame() for elem in countries_list}
1531
1559
  for key in DataFrameDict_M.keys():
1532
1560
  DataFrameDict_M[key] = data_M[:][data_M.country == key]
1533
1561
 
1534
- # create a data frame dictionary to store your quarterly data frames
1535
- DataFrameDict_Q = {elem : pd.DataFrame() for elem in countries_list}
1562
+ # Create a data frame dictionary to store your quarterly data frames
1563
+ DataFrameDict_Q = {elem: pd.DataFrame() for elem in countries_list}
1536
1564
  for key in DataFrameDict_Q.keys():
1537
1565
  DataFrameDict_Q[key] = data_Q[:][data_Q.country == key]
1538
1566
 
@@ -1540,41 +1568,40 @@ class datapull:
1540
1568
  countries_df_list_M = []
1541
1569
  for i in countries_list:
1542
1570
  df = pd.DataFrame(DataFrameDict_M[i])
1543
- df.rename(columns={0:'Values'},inplace=True)
1544
- df = pd.pivot_table(data=df,index='time',values='Values',columns=['subject','measure'])
1571
+ df.rename(columns={0: 'Values'}, inplace=True)
1572
+ df = pd.pivot_table(data=df, index='time', values='Values', columns=['subject', 'measure'])
1545
1573
  countries_df_list_M.append(df)
1546
1574
 
1547
1575
  # Create a quarterly list of the dataframes to iterate through
1548
1576
  countries_df_list_Q = []
1549
1577
  for i in countries_list:
1550
1578
  df = pd.DataFrame(DataFrameDict_Q[i])
1551
- df.rename(columns={0:'Values'},inplace=True)
1552
- df = pd.pivot_table(data=df,index='time',values='Values',columns=['subject','measure'])
1579
+ df.rename(columns={0: 'Values'}, inplace=True)
1580
+ df = pd.pivot_table(data=df, index='time', values='Values', columns=['subject', 'measure'])
1553
1581
  countries_df_list_Q.append(df)
1554
1582
 
1555
- combined_countries_df_list = list(zip(countries_df_list_M,countries_df_list_Q))
1583
+ combined_countries_df_list = list(zip(countries_df_list_M, countries_df_list_Q))
1556
1584
 
1557
1585
  # Loop through and create dataframes for every country
1558
1586
  for index, data in enumerate(combined_countries_df_list):
1559
-
1560
1587
  # Find country being extracted
1561
- country = countries_list[index]
1588
+ country = countries_list[index]
1562
1589
  print(country)
1563
1590
 
1564
1591
  # For consumer confidence
1565
1592
  # For countries with no data
1566
- if country in ['CAN','IND','NOR']:
1593
+ if country in ['CAN', 'IND', 'NOR']:
1567
1594
  Consumer_Confidence_Index_df_M = pd.DataFrame()
1568
1595
  Consumer_Confidence_Index_df_Q = pd.DataFrame()
1569
- # For countries with quarterly data
1596
+ # For countries with quarterly data
1570
1597
  elif country in []:
1571
1598
  Consumer_Confidence_Index_df_Q = data[1]['CSCICP03']['IXNSA']
1572
- Consumer_Confidence_Index_df_Q.rename('consumer_confidence_index',inplace=True)
1599
+ Consumer_Confidence_Index_df_Q.rename('consumer_confidence_index', inplace=True)
1573
1600
  Consumer_Confidence_Index_df_M = pd.DataFrame()
1574
1601
  # For countries with monthly data
1575
1602
  else:
1576
1603
  Consumer_Confidence_Index_df_M = data[0]['CSCICP03']['IXNSA']
1577
- Consumer_Confidence_Index_df_M.rename('consumer_confidence_index',inplace=True)
1604
+ Consumer_Confidence_Index_df_M.rename('consumer_confidence_index', inplace=True)
1578
1605
  Consumer_Confidence_Index_df_Q = pd.DataFrame()
1579
1606
 
1580
1607
  # For consumer prices for COST OF LIVING
@@ -1583,14 +1610,14 @@ class datapull:
1583
1610
  Consumer_Price_Index_Cost_Of_Living_df_M = pd.DataFrame()
1584
1611
  Consumer_Price_Index_Cost_Of_Living_df_Q = pd.DataFrame()
1585
1612
  # For countries with quarterly data
1586
- elif country in ['AUS','NZL']:
1613
+ elif country in ['AUS', 'NZL']:
1587
1614
  Consumer_Price_Index_Cost_Of_Living_df_Q = data[1]['CPALTT01']['IXNB']
1588
- Consumer_Price_Index_Cost_Of_Living_df_Q.rename('consumer_price_index_cost_of_living',inplace=True)
1615
+ Consumer_Price_Index_Cost_Of_Living_df_Q.rename('consumer_price_index_cost_of_living', inplace=True)
1589
1616
  Consumer_Price_Index_Cost_Of_Living_df_M = pd.DataFrame()
1590
1617
  # For countries with monthly data
1591
1618
  else:
1592
1619
  Consumer_Price_Index_Cost_Of_Living_df_M = data[0]['CPALTT01']['IXNB']
1593
- Consumer_Price_Index_Cost_Of_Living_df_M.rename('consumer_price_index_cost_of_living',inplace=True)
1620
+ Consumer_Price_Index_Cost_Of_Living_df_M.rename('consumer_price_index_cost_of_living', inplace=True)
1594
1621
  Consumer_Price_Index_Cost_Of_Living_df_Q = pd.DataFrame()
1595
1622
 
1596
1623
  # For consumer prices FOR INFLATION
@@ -1599,125 +1626,113 @@ class datapull:
1599
1626
  Consumer_Price_Index_Inflation_df_M = pd.DataFrame()
1600
1627
  Consumer_Price_Index_Inflation_df_Q = pd.DataFrame()
1601
1628
  # For countries with quarterly data
1602
- elif country in ['AUS','NZL']:
1629
+ elif country in ['AUS', 'NZL']:
1603
1630
  Consumer_Price_Index_Inflation_df_Q = data[1]['CPALTT01']['GY']
1604
- Consumer_Price_Index_Inflation_df_Q.rename('consumer_price_index_inflation',inplace=True)
1631
+ Consumer_Price_Index_Inflation_df_Q.rename('consumer_price_index_inflation', inplace=True)
1605
1632
  Consumer_Price_Index_Inflation_df_M = pd.DataFrame()
1606
1633
  # For countries with monthly data
1607
1634
  else:
1608
1635
  Consumer_Price_Index_Inflation_df_M = data[0]['CPALTT01']['GY']
1609
- Consumer_Price_Index_Inflation_df_M.rename('consumer_price_index_inflation',inplace=True)
1636
+ Consumer_Price_Index_Inflation_df_M.rename('consumer_price_index_inflation', inplace=True)
1610
1637
  Consumer_Price_Index_Inflation_df_Q = pd.DataFrame()
1611
1638
 
1612
- # For GDP Index Smoothed
1639
+ # For GDP Index Smoothed
1613
1640
  # For countries with no data
1614
- if country in ['NLD','CHE','NZL','SWE','NOR']:
1641
+ if country in ['NLD', 'CHE', 'NZL', 'SWE', 'NOR']:
1615
1642
  GDP_Index_Smoothed_df_M = pd.DataFrame()
1616
1643
  GDP_Index_Smoothed_df_Q = pd.DataFrame()
1617
1644
  # For countries with quarterly data
1618
1645
  elif country in []:
1619
1646
  GDP_Index_Smoothed_df_Q = data[1]['LORSGPRT']['STSA']
1620
- GDP_Index_Smoothed_df_Q.rename('gdp_index_smoothed',inplace=True)
1647
+ GDP_Index_Smoothed_df_Q.rename('gdp_index_smoothed', inplace=True)
1621
1648
  GDP_Index_Smoothed_df_M = pd.DataFrame()
1622
1649
  # For countries with monthly data
1623
1650
  else:
1624
1651
  GDP_Index_Smoothed_df_M = data[0]['LORSGPRT']['STSA']
1625
- GDP_Index_Smoothed_df_M.rename('gdp_index_smoothed',inplace=True)
1652
+ GDP_Index_Smoothed_df_M.rename('gdp_index_smoothed', inplace=True)
1626
1653
  GDP_Index_Smoothed_df_Q = pd.DataFrame()
1627
1654
 
1628
1655
  # For Harmonised Unemployment Index
1629
1656
  # For countries with no data
1630
- if country in ['IND','CHE','ZAF','CHN']:
1657
+ if country in ['IND', 'CHE', 'ZAF', 'CHN']:
1631
1658
  Harmonised_Unemployment_Index_df_M = pd.DataFrame()
1632
1659
  Harmonised_Unemployment_Index_df_Q = pd.DataFrame()
1633
1660
  # For countries with quarterly data
1634
1661
  elif country in ['NZL']:
1635
1662
  Harmonised_Unemployment_Index_df_Q = data[1]['LRHUTTTT']['STSA']
1636
- Harmonised_Unemployment_Index_df_Q.rename('harmonised_unemployment_index',inplace=True)
1663
+ Harmonised_Unemployment_Index_df_Q.rename('harmonised_unemployment_index', inplace=True)
1637
1664
  Harmonised_Unemployment_Index_df_M = pd.DataFrame()
1638
1665
  # For countries with monthly data
1639
- else:
1666
+ else:
1640
1667
  Harmonised_Unemployment_Index_df_M = data[0]['LRHUTTTT']['STSA']
1641
- Harmonised_Unemployment_Index_df_M.rename('harmonised_unemployment_index',inplace=True)
1668
+ Harmonised_Unemployment_Index_df_M.rename('harmonised_unemployment_index', inplace=True)
1642
1669
  Harmonised_Unemployment_Index_df_Q = pd.DataFrame()
1643
1670
 
1644
- # For hourly earnings index manufacturing
1671
+ # For hourly earnings index manufacturing
1645
1672
  # For countries with no data
1646
- if country in ['IND','CHE','ZAF','CHN']:
1673
+ if country in ['IND', 'CHE', 'ZAF', 'CHN']:
1647
1674
  Hourly_Earnings_Index_Manufacturing_df_M = pd.DataFrame()
1648
1675
  Hourly_Earnings_Index_Manufacturing_df_Q = pd.DataFrame()
1649
1676
  # For countries with quarterly data
1650
- elif country in ['FRA','DEU','ESP','AUS','NZL','KOR','NOR']:
1677
+ elif country in ['FRA', 'DEU', 'ESP', 'AUS', 'NZL', 'KOR', 'NOR']:
1651
1678
  Hourly_Earnings_Index_Manufacturing_df_Q = data[1]['LCEAMN01']['IXOBSA']
1652
- Hourly_Earnings_Index_Manufacturing_df_Q.rename('hourly_earnings_index_manufacturing',inplace=True)
1679
+ Hourly_Earnings_Index_Manufacturing_df_Q.rename('hourly_earnings_index_manufacturing', inplace=True)
1653
1680
  Hourly_Earnings_Index_Manufacturing_df_M = pd.DataFrame()
1654
1681
  # For countries with monthly data
1655
1682
  else:
1656
1683
  Hourly_Earnings_Index_Manufacturing_df_M = data[0]['LCEAMN01']['IXOBSA']
1657
- Hourly_Earnings_Index_Manufacturing_df_M.rename('hourly_earnings_index_manufacturing',inplace=True)
1684
+ Hourly_Earnings_Index_Manufacturing_df_M.rename('hourly_earnings_index_manufacturing', inplace=True)
1658
1685
  Hourly_Earnings_Index_Manufacturing_df_Q = pd.DataFrame()
1659
1686
 
1660
- # For hourly earnings index private
1661
- # Hourly_Earnings_Index_Private_df = data['GBR']['LCEAPR']['IXOBSA']
1662
- # Hourly_Earnings_Index_Private_df.rename('Hourly Earnings Index Private',inplace=True)
1663
-
1664
1687
  # For Short Term Interest Rate
1665
1688
  # For countries with no data
1666
1689
  if country in []:
1667
1690
  Short_Term_Interest_Rate_df_M = pd.DataFrame()
1668
1691
  Short_Term_Interest_Rate_df_Q = pd.DataFrame()
1669
- # For countries with quarterly data
1692
+ # For countries with quarterly data
1670
1693
  elif country in []:
1671
1694
  Short_Term_Interest_Rate_df_Q = data[1]['IR3TIB01']['ST']
1672
- Short_Term_Interest_Rate_df_Q.rename('short_term_interest_rate',inplace=True)
1695
+ Short_Term_Interest_Rate_df_Q.rename('short_term_interest_rate', inplace=True)
1673
1696
  Short_Term_Interest_Rate_df_M = pd.DataFrame()
1674
1697
  # For countries with monthly data
1675
1698
  else:
1676
1699
  Short_Term_Interest_Rate_df_M = data[0]['IR3TIB01']['ST']
1677
- Short_Term_Interest_Rate_df_M.rename('short_term_interest_rate',inplace=True)
1700
+ Short_Term_Interest_Rate_df_M.rename('short_term_interest_rate', inplace=True)
1678
1701
  Short_Term_Interest_Rate_df_Q = pd.DataFrame()
1679
1702
 
1680
1703
  # For Industrial Product Growth on Previous Period
1681
1704
  # For countries with no data
1682
- if country in ['ZAF','CHN']:
1705
+ if country in ['ZAF', 'CHN']:
1683
1706
  Industrial_Product_Growth_on_Previous_Period_df_M = pd.DataFrame()
1684
1707
  Industrial_Product_Growth_on_Previous_Period_df_Q = pd.DataFrame()
1685
1708
  # For countries with quarterly data
1686
- elif country in ['AUS','NZL']:
1709
+ elif country in ['AUS', 'NZL']:
1687
1710
  Industrial_Product_Growth_on_Previous_Period_df_Q = data[1]['PRINTO01']['GPSA']
1688
- Industrial_Product_Growth_on_Previous_Period_df_Q.rename('industrial_product_growth_on_previous_period',inplace=True)
1711
+ Industrial_Product_Growth_on_Previous_Period_df_Q.rename('industrial_product_growth_on_previous_period', inplace=True)
1689
1712
  Industrial_Product_Growth_on_Previous_Period_df_M = pd.DataFrame()
1690
1713
  # For countries with monthly data
1691
1714
  else:
1692
1715
  Industrial_Product_Growth_on_Previous_Period_df_M = data[0]['PRINTO01']['GPSA']
1693
- Industrial_Product_Growth_on_Previous_Period_df_M.rename('industrial_product_growth_on_previous_period',inplace=True)
1716
+ Industrial_Product_Growth_on_Previous_Period_df_M.rename('industrial_product_growth_on_previous_period', inplace=True)
1694
1717
  Industrial_Product_Growth_on_Previous_Period_df_Q = pd.DataFrame()
1695
1718
 
1696
1719
  # For Industrial Production Index
1697
1720
  # For countries with no data
1698
- if country in ['ZAF','CHN']:
1721
+ if country in ['ZAF', 'CHN']:
1699
1722
  Industrial_Production_Index_df_M = pd.DataFrame()
1700
1723
  Industrial_Production_Index_df_Q = pd.DataFrame()
1701
1724
  # For countries with quarterly data
1702
- elif country in ['AUS','NZL']:
1725
+ elif country in ['AUS', 'NZL']:
1703
1726
  Industrial_Production_Index_df_Q = data[1]['PRINTO01']['IXOBSA']
1704
- Industrial_Production_Index_df_Q.rename('industrial_production_index',inplace=True)
1727
+ Industrial_Production_Index_df_Q.rename('industrial_production_index', inplace=True)
1705
1728
  Industrial_Production_Index_df_M = pd.DataFrame()
1706
1729
  # For countries with monthly data
1707
1730
  else:
1708
1731
  Industrial_Production_Index_df_M = data[0]['PRINTO01']['IXOBSA']
1709
- Industrial_Production_Index_df_M.rename('industrial_production_index',inplace=True)
1732
+ Industrial_Production_Index_df_M.rename('industrial_production_index', inplace=True)
1710
1733
  Industrial_Production_Index_df_Q = pd.DataFrame()
1711
1734
 
1712
- # For USD GBP Exchange Rate
1713
- # USD_GBP_Exchange_Rate_df = data['GBR']['PRINTO01']['IXOBSA']
1714
- # USD_GBP_Exchange_Rate_df.rename('Industrial Production Index',inplace=True)
1715
-
1716
-
1717
-
1718
-
1719
-
1720
- # Create monthly macroeconomic dataframe
1735
+ # Create monthly macroeconomic dataframe
1721
1736
  all_dfs_list_M = [Consumer_Confidence_Index_df_M,
1722
1737
  Consumer_Price_Index_Cost_Of_Living_df_M,
1723
1738
  Consumer_Price_Index_Inflation_df_M,
@@ -1728,11 +1743,11 @@ class datapull:
1728
1743
  Industrial_Product_Growth_on_Previous_Period_df_M,
1729
1744
  Industrial_Production_Index_df_M]
1730
1745
 
1731
- # Check if any dataframes are empty and if there are remove them
1732
- all_dfs_list_M = [df for df in all_dfs_list_M if not df.empty]
1733
- cif_Macroeconomic_df_M = pd.concat(all_dfs_list_M,axis=1)
1746
+ # Check if any dataframes are empty and if there are remove them
1747
+ all_dfs_list_M = [df for df in all_dfs_list_M if not df.empty]
1748
+ cif_Macroeconomic_df_M = pd.concat(all_dfs_list_M, axis=1)
1734
1749
 
1735
- # Create quarterly macroeconomic dataframe
1750
+ # Create quarterly macroeconomic dataframe
1736
1751
  all_dfs_list_Q = [Consumer_Confidence_Index_df_Q,
1737
1752
  Consumer_Price_Index_Cost_Of_Living_df_Q,
1738
1753
  Consumer_Price_Index_Inflation_df_Q,
@@ -1743,38 +1758,38 @@ class datapull:
1743
1758
  Industrial_Product_Growth_on_Previous_Period_df_Q,
1744
1759
  Industrial_Production_Index_df_Q]
1745
1760
 
1746
- # Check if any dataframes are empty and if there are remove them
1747
- all_dfs_list_Q = [df for df in all_dfs_list_Q if not df.empty]
1761
+ # Check if any dataframes are empty and if there are remove them
1762
+ all_dfs_list_Q = [df for df in all_dfs_list_Q if not df.empty]
1748
1763
  if all_dfs_list_Q != []:
1749
- macroeconomic_monthly_df_Q = pd.concat(all_dfs_list_Q,axis=1)
1750
- else:
1751
- macroeconomic_monthly_df_Q = []
1764
+ macroeconomic_monthly_df_Q = pd.concat(all_dfs_list_Q, axis=1)
1765
+ else:
1766
+ macroeconomic_monthly_df_Q = pd.DataFrame()
1752
1767
 
1753
1768
  # For USD GBP Exchange Rate
1754
1769
  # If it's the UK add this series else don't
1755
1770
  if countries_list[index] == 'GBR':
1756
- USD_GBP_Exchange_Rate_df = pd.read_csv('https://stats.oecd.org/SDMX-JSON/data/MEI_FIN/CCUS.' + countries_list[index] + '.M/OECD?contentType=csv')
1771
+ USD_GBP_Exchange_Rate_df = pd.read_csv(
1772
+ 'https://stats.oecd.org/SDMX-JSON/data/MEI_FIN/CCUS.' + countries_list[index] + '.M/OECD?contentType=csv')
1757
1773
  USD_GBP_Exchange_Rate_df.head()
1758
- USD_GBP_Exchange_Rate_df_pivot = pd.pivot_table(USD_GBP_Exchange_Rate_df,values='Value',index='TIME',columns='Subject')
1774
+ USD_GBP_Exchange_Rate_df_pivot = pd.pivot_table(USD_GBP_Exchange_Rate_df, values='Value', index='TIME',
1775
+ columns='Subject')
1759
1776
  USD_GBP_Exchange_Rate_df_pivot_final = USD_GBP_Exchange_Rate_df_pivot.loc["2015-01":]
1760
- USD_GBP_Exchange_Rate_df_pivot_final.rename(columns={'Currency exchange rates, monthly average':'usd_gbp_exchange_rate'},inplace=True)
1777
+ USD_GBP_Exchange_Rate_df_pivot_final.rename(
1778
+ columns={'Currency exchange rates, monthly average': 'usd_gbp_exchange_rate'}, inplace=True)
1761
1779
 
1762
1780
  # Create final monthly dataframe
1763
- macroeconomic_monthly_df_M = pd.concat([cif_Macroeconomic_df_M,USD_GBP_Exchange_Rate_df_pivot_final],axis=1)
1781
+ macroeconomic_monthly_df_M = pd.concat([cif_Macroeconomic_df_M, USD_GBP_Exchange_Rate_df_pivot_final], axis=1)
1764
1782
  else:
1765
1783
  # Create final monthly dataframe
1766
1784
  macroeconomic_monthly_df_M = cif_Macroeconomic_df_M
1767
1785
 
1768
-
1769
-
1770
-
1771
- # Create the final W/C Sunday dataframe
1786
+ # Create the final W/C Sunday dataframe
1772
1787
  # For monthly data
1773
- macroeconomic_monthly_df_M['Date']=macroeconomic_monthly_df_M.index
1788
+ macroeconomic_monthly_df_M['Date'] = macroeconomic_monthly_df_M.index
1774
1789
  df_M = macroeconomic_monthly_df_M.set_index(pd.to_datetime(macroeconomic_monthly_df_M['Date'])).drop(columns='Date')
1775
- df_M.fillna(method="ffill",inplace=True)
1790
+ df_M.fillna(method="ffill", inplace=True)
1776
1791
  df_M.reset_index(inplace=True)
1777
-
1792
+
1778
1793
  daily_records = []
1779
1794
  # Iterate over each row in the DataFrame
1780
1795
  for _, row in df_M.iterrows():
@@ -1788,37 +1803,32 @@ class datapull:
1788
1803
 
1789
1804
  # Convert the list of daily records into a DataFrame
1790
1805
  daily_df = pd.DataFrame(daily_records)
1791
-
1792
- # Extend dataframe to include the current data if needed
1793
- datelist = pd.date_range(daily_df["Date"].iloc[-1]+pd.Timedelta(days=1),datetime.today()).tolist()
1794
- extended_data = np.repeat([list(daily_df.iloc[-1,1:].values)],len(datelist),axis=0)
1795
- q = pd.Series(datelist,name="Date")
1796
- s = pd.DataFrame(extended_data,columns=list(df_M.columns[1:]))
1797
- extended_daily_df = pd.concat([q,s],axis=1)
1798
- extended_daily_df = daily_df.append(extended_daily_df, ignore_index=False)
1799
-
1800
- # Create a week commencing column
1801
- extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
1802
- extended_daily_df['week_start'] = extended_daily_df["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
1803
- extended_daily_df.drop("Date",axis=1,inplace=True)
1804
- extended_daily_df.rename(columns={'week_start':"Date"},inplace=True)
1805
-
1806
- # Take a weekly average
1807
- macroeconomic_weekly_df_M = extended_daily_df.groupby('Date').mean()
1808
-
1809
-
1810
-
1811
-
1812
1806
 
1807
+ # Extend dataframe to include the current data if needed
1808
+ datelist = pd.date_range(daily_df["Date"].iloc[-1] + pd.Timedelta(days=1), datetime.today()).tolist()
1809
+ extended_data = np.repeat([list(daily_df.iloc[-1, 1:].values)], len(datelist), axis=0)
1810
+ q = pd.Series(datelist, name="Date")
1811
+ s = pd.DataFrame(extended_data, columns=list(df_M.columns[1:]))
1812
+ extended_daily_df = pd.concat([q, s], axis=1)
1813
+ extended_daily_df = pd.concat([daily_df, extended_daily_df], ignore_index=False)
1813
1814
 
1815
+ # Create a week commencing column
1816
+ extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
1817
+ extended_daily_df['week_start'] = extended_daily_df["Date"].apply(
1818
+ lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
1819
+ extended_daily_df.drop("Date", axis=1, inplace=True)
1820
+ extended_daily_df.rename(columns={'week_start': "Date"}, inplace=True)
1814
1821
 
1822
+ # Take a weekly average
1823
+ macroeconomic_weekly_df_M = extended_daily_df.groupby('Date').mean()
1815
1824
 
1816
1825
  # For quarterly data
1817
1826
  # If there are quarterly datasets
1818
1827
  if all_dfs_list_Q != []:
1819
- macroeconomic_monthly_df_Q['Date']=macroeconomic_monthly_df_Q.index
1820
- df_Q = macroeconomic_monthly_df_Q.set_index(pd.to_datetime(macroeconomic_monthly_df_Q['Date'])).drop(columns='Date')
1821
- df_Q.fillna(method="ffill",inplace=True)
1828
+ macroeconomic_monthly_df_Q['Date'] = macroeconomic_monthly_df_Q.index
1829
+ df_Q = macroeconomic_monthly_df_Q.set_index(pd.to_datetime(macroeconomic_monthly_df_Q['Date'])).drop(
1830
+ columns='Date')
1831
+ df_Q.fillna(method="ffill", inplace=True)
1822
1832
  df_Q.reset_index(inplace=True)
1823
1833
 
1824
1834
  daily_records = []
@@ -1826,45 +1836,47 @@ class datapull:
1826
1836
  year = row["Date"].year
1827
1837
  month = row["Date"].month
1828
1838
  day = row["Date"].day
1829
- last_date = get_last_day_of_the_quarter(datetime(year,month,day).date())
1830
- all_days = pd.date_range(row["Date"],last_date,freq="D")
1839
+ last_date = get_last_day_of_the_quarter(datetime(year, month, day).date())
1840
+ all_days = pd.date_range(row["Date"], last_date, freq="D")
1831
1841
 
1832
1842
  # Create a new record for each day of the quarter
1833
1843
  for day in all_days:
1834
1844
  daily_row = row.copy()
1835
- daily_row["Date"] = row["Date"].replace(day=day.day,month=day.month)
1845
+ daily_row["Date"] = row["Date"].replace(day=day.day, month=day.month)
1836
1846
  daily_records.append(daily_row)
1837
1847
 
1838
1848
  # Convert the list of daily records into a DataFrame
1839
1849
  daily_df = pd.DataFrame(daily_records)
1840
-
1850
+
1841
1851
  # Extend dataframe to include data up to today
1842
- datelist = pd.date_range(daily_df["Date"].iloc[-1]+pd.Timedelta(days=1),datetime.today()).tolist()
1843
- extended_data = np.repeat([list(daily_df.iloc[-1,1:].values)],len(datelist),axis=0)
1844
- q = pd.Series(datelist,name="Date")
1845
- s = pd.DataFrame(extended_data,columns=list(df_Q.columns[1:]))
1846
- extended_daily_df = pd.concat([q,s],axis=1)
1847
- extended_daily_df = daily_df.append(extended_daily_df, ignore_index=False)
1848
-
1852
+ datelist = pd.date_range(daily_df["Date"].iloc[-1] + pd.Timedelta(days=1), datetime.today()).tolist()
1853
+ extended_data = np.repeat([list(daily_df.iloc[-1, 1:].values)], len(datelist), axis=0)
1854
+ q = pd.Series(datelist, name="Date")
1855
+ s = pd.DataFrame(extended_data, columns=list(df_Q.columns[1:]))
1856
+ extended_daily_df = pd.concat([q, s], axis=1)
1857
+ extended_daily_df = pd.concat([daily_df, extended_daily_df], ignore_index=False)
1858
+
1849
1859
  # Create a week commencing column
1850
- extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
1851
- extended_daily_df['week_start'] = extended_daily_df["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
1852
- extended_daily_df.drop("Date",axis=1,inplace=True)
1853
- extended_daily_df.rename(columns={'week_start':"Date"},inplace=True)
1854
-
1860
+ extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
1861
+ extended_daily_df['week_start'] = extended_daily_df["Date"].apply(
1862
+ lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
1863
+ extended_daily_df.drop("Date", axis=1, inplace=True)
1864
+ extended_daily_df.rename(columns={'week_start': "Date"}, inplace=True)
1865
+
1855
1866
  # Take a weekly average
1856
1867
  macroeconomic_weekly_df_Q = extended_daily_df.groupby('Date').mean()
1857
1868
 
1858
1869
  # Merge the two datasets together
1859
1870
  if all_dfs_list_Q != []:
1860
- macroeconomic_weekly_df = macroeconomic_weekly_df_M.merge(macroeconomic_weekly_df_Q,left_index=True, right_index=True)
1871
+ macroeconomic_weekly_df = macroeconomic_weekly_df_M.merge(macroeconomic_weekly_df_Q, left_index=True,
1872
+ right_index=True)
1861
1873
  # If there are no quarterly datasets
1862
1874
  else:
1863
1875
  macroeconomic_weekly_df = macroeconomic_weekly_df_M
1864
-
1865
- # Change datime format
1866
- macroeconomic_weekly_df.index = macroeconomic_weekly_df.index.strftime('%d/%m/%Y')
1867
-
1876
+
1877
+ # Change datetime format
1878
+ macroeconomic_weekly_df.index = macroeconomic_weekly_df.index.strftime('%d/%m/%Y')
1879
+
1868
1880
  macroeconomic_weekly_df.reset_index()
1869
1881
  macroeconomic_weekly_df.reset_index(drop=False, inplace=True)
1870
1882
  macroeconomic_weekly_df.rename(columns={'Date': 'OBS'}, inplace=True)
@@ -2050,12 +2062,11 @@ class datapull:
2050
2062
  country = country_dict[country]
2051
2063
 
2052
2064
  # Choose start and end dates
2053
- import datetime
2054
2065
  start_day = 1
2055
2066
  start_month = 1
2056
2067
  start_year = 2014
2057
- formatted_date = datetime.datetime(start_year, start_month, start_day).strftime("%Y-%m-%d")
2058
- today = datetime.datetime.now()
2068
+ formatted_date = datetime(start_year, start_month, start_day).strftime("%Y-%m-%d")
2069
+ today = datetime.now()
2059
2070
  end_day = today.day
2060
2071
  end_month = today.month
2061
2072
  end_year = today.year
@@ -2150,7 +2161,8 @@ class datapull:
2150
2161
  weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
2151
2162
 
2152
2163
  # Group by week_starting and summarize
2153
- weekly_avg_temp = weather.groupby("week_starting").mean()
2164
+ numeric_columns = weather.select_dtypes(include='number').columns
2165
+ weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
2154
2166
  weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
2155
2167
  "min_temp_f": "avg_min_temp_f",
2156
2168
  "mean_temp_f": "avg_mean_temp_f",
@@ -2209,7 +2221,8 @@ class datapull:
2209
2221
  weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
2210
2222
 
2211
2223
  # Group by week_starting and summarize
2212
- weekly_avg_temp = weather.groupby("week_starting").mean()
2224
+ numeric_columns = weather.select_dtypes(include='number').columns
2225
+ weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
2213
2226
  weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
2214
2227
  "min_temp_f": "avg_min_temp_f",
2215
2228
  "mean_temp_f": "avg_mean_temp_f",
@@ -2260,7 +2273,8 @@ class datapull:
2260
2273
  weather['week_starting'] = weather["day"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
2261
2274
 
2262
2275
  # Group by week_starting and summarize
2263
- weekly_avg_temp = weather.groupby("week_starting").mean()
2276
+ numeric_columns = weather.select_dtypes(include='number').columns
2277
+ weekly_avg_temp = weather.groupby("week_starting")[numeric_columns].mean()
2264
2278
  weekly_avg_temp.rename(columns={"max_temp_f": "avg_max_temp_f",
2265
2279
  "min_temp_f": "avg_min_temp_f",
2266
2280
  "mean_temp_f": "avg_mean_temp_f",
@@ -2282,7 +2296,6 @@ class datapull:
2282
2296
 
2283
2297
  # Loop through each city and fetch weather data
2284
2298
  for city in cities:
2285
-
2286
2299
  # Initialize Nominatim API
2287
2300
  geolocator = Nominatim(user_agent="MyApp")
2288
2301
  location = geolocator.geocode(city)
@@ -2318,7 +2331,8 @@ class datapull:
2318
2331
  all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
2319
2332
 
2320
2333
  # Group by week_starting and summarize
2321
- weekly_avg_rain = all_weather_data.groupby("week_starting").mean()
2334
+ numeric_columns = all_weather_data.select_dtypes(include='number').columns
2335
+ weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
2322
2336
  weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
2323
2337
 
2324
2338
  # Change index to datetime
@@ -2337,7 +2351,6 @@ class datapull:
2337
2351
 
2338
2352
  # Loop through each city and fetch weather data
2339
2353
  for city in cities:
2340
-
2341
2354
  # Initialize Nominatim API
2342
2355
  geolocator = Nominatim(user_agent="MyApp")
2343
2356
  location = geolocator.geocode(city)
@@ -2373,7 +2386,8 @@ class datapull:
2373
2386
  all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
2374
2387
 
2375
2388
  # Group by week_starting and summarize
2376
- weekly_avg_rain = all_weather_data.groupby("week_starting").mean()
2389
+ numeric_columns = all_weather_data.select_dtypes(include='number').columns
2390
+ weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
2377
2391
  weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
2378
2392
 
2379
2393
  # Change index to datetime
@@ -2392,7 +2406,6 @@ class datapull:
2392
2406
 
2393
2407
  # Loop through each city and fetch weather data
2394
2408
  for city in cities:
2395
-
2396
2409
  # Initialize Nominatim API
2397
2410
  geolocator = Nominatim(user_agent="MyApp")
2398
2411
  location = geolocator.geocode(city)
@@ -2428,7 +2441,8 @@ class datapull:
2428
2441
  all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
2429
2442
 
2430
2443
  # Group by week_starting and summarize
2431
- weekly_avg_rain = all_weather_data.groupby("week_starting").mean()
2444
+ numeric_columns = all_weather_data.select_dtypes(include='number').columns
2445
+ weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
2432
2446
  weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
2433
2447
 
2434
2448
  # Change index to datetime
@@ -2447,7 +2461,6 @@ class datapull:
2447
2461
 
2448
2462
  # Loop through each city and fetch weather data
2449
2463
  for city in cities:
2450
-
2451
2464
  # Initialize Nominatim API
2452
2465
  geolocator = Nominatim(user_agent="MyApp")
2453
2466
  location = geolocator.geocode(city)
@@ -2483,7 +2496,8 @@ class datapull:
2483
2496
  all_weather_data['week_starting'] = all_weather_data["date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
2484
2497
 
2485
2498
  # Group by week_starting and summarize
2486
- weekly_avg_rain = all_weather_data.groupby("week_starting").mean()
2499
+ numeric_columns = all_weather_data.select_dtypes(include='number').columns
2500
+ weekly_avg_rain = all_weather_data.groupby("week_starting")[numeric_columns].mean()
2487
2501
  weekly_avg_rain.rename(columns={"rainfall": "avg_rainfall"}, inplace=True)
2488
2502
 
2489
2503
  # Change index to datetime
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.6.0.0
3
+ Version: 0.6.0.2
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -8,7 +8,7 @@ def read_md(file_name):
8
8
  return f.read()
9
9
  return ''
10
10
 
11
- VERSION = '0.6.0.0'
11
+ VERSION = '0.6.0.2'
12
12
  DESCRIPTION = 'IMS Data Processing Package'
13
13
  LONG_DESCRIPTION = read_md('README.md') # Reading from README.md
14
14
 
File without changes
File without changes