imsciences 0.6.3.2__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/__init__.py +0 -1
- imsciences/datafunctions.py +384 -236
- {imsciences-0.6.3.2.dist-info → imsciences-0.8.1.dist-info}/METADATA +122 -72
- {imsciences-0.6.3.2.dist-info → imsciences-0.8.1.dist-info}/RECORD +7 -7
- {imsciences-0.6.3.2.dist-info → imsciences-0.8.1.dist-info}/PKG-INFO-IMS-24Ltp-3 +0 -0
- {imsciences-0.6.3.2.dist-info → imsciences-0.8.1.dist-info}/WHEEL +0 -0
- {imsciences-0.6.3.2.dist-info → imsciences-0.8.1.dist-info}/top_level.txt +0 -0
imsciences/__init__.py
CHANGED
imsciences/datafunctions.py
CHANGED
|
@@ -4,21 +4,18 @@ import os
|
|
|
4
4
|
import plotly.express as px
|
|
5
5
|
import plotly.graph_objs as go
|
|
6
6
|
import numpy as np
|
|
7
|
-
import datetime
|
|
8
7
|
import re
|
|
9
8
|
from fredapi import Fred
|
|
10
9
|
import time
|
|
11
|
-
from datetime import datetime, timedelta
|
|
10
|
+
from datetime import datetime, timedelta
|
|
12
11
|
from io import StringIO
|
|
13
|
-
import urllib
|
|
14
|
-
import requests_cache # noqa: F401
|
|
15
|
-
import urllib.request # noqa: F401
|
|
16
12
|
import requests
|
|
17
|
-
from geopy.geocoders import Nominatim # noqa: F401
|
|
18
13
|
import subprocess
|
|
19
14
|
import json
|
|
20
15
|
import xml.etree.ElementTree as ET
|
|
21
16
|
from bs4 import BeautifulSoup
|
|
17
|
+
import yfinance as yf
|
|
18
|
+
import holidays
|
|
22
19
|
|
|
23
20
|
class dataprocessing:
|
|
24
21
|
|
|
@@ -1767,17 +1764,6 @@ class dataprocessing:
|
|
|
1767
1764
|
########################################################################################################################################
|
|
1768
1765
|
########################################################################################################################################
|
|
1769
1766
|
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
1767
|
ims_proc = dataprocessing()
|
|
1782
1768
|
|
|
1783
1769
|
class datapull:
|
|
@@ -1788,38 +1774,43 @@ class datapull:
|
|
|
1788
1774
|
print("\n1. pull_fred_data")
|
|
1789
1775
|
print(" - Description: Get data from FRED by using series id tokens.")
|
|
1790
1776
|
print(" - Usage: pull_fred_data(week_commencing, series_id_list)")
|
|
1791
|
-
print(" - Example: pull_fred_data('mon', ['GPDIC1'
|
|
1777
|
+
print(" - Example: pull_fred_data('mon', ['GPDIC1'])")
|
|
1792
1778
|
|
|
1793
1779
|
print("\n2. pull_boe_data")
|
|
1794
1780
|
print(" - Description: Fetch and process Bank of England interest rate data.")
|
|
1795
1781
|
print(" - Usage: pull_boe_data(week_commencing)")
|
|
1796
1782
|
print(" - Example: pull_boe_data('mon')")
|
|
1797
1783
|
|
|
1798
|
-
print("\n3.
|
|
1799
|
-
print(" - Description: Fetch and process time series data from the ONS API.")
|
|
1800
|
-
print(" - Usage: pull_ons_data(series_list, week_commencing)")
|
|
1801
|
-
print(" - Example: pull_ons_data([{'series_id': 'LMSBSA', 'dataset_id': 'LMS'}], 'mon')")
|
|
1802
|
-
|
|
1803
|
-
print("\n4. pull_oecd")
|
|
1784
|
+
print("\n3. pull_oecd")
|
|
1804
1785
|
print(" - Description: Fetch macroeconomic data from OECD for a specified country.")
|
|
1805
|
-
print(" - Usage: pull_oecd(country='GBR', week_commencing='mon', start_date: '
|
|
1806
|
-
print(" - Example: pull_oecd('GBR', 'mon', '
|
|
1786
|
+
print(" - Usage: pull_oecd(country='GBR', week_commencing='mon', start_date: '2020-01-01')")
|
|
1787
|
+
print(" - Example: pull_oecd('GBR', 'mon', '2000-01-01')")
|
|
1807
1788
|
|
|
1808
|
-
print("\
|
|
1789
|
+
print("\n4. get_google_mobility_data")
|
|
1809
1790
|
print(" - Description: Fetch Google Mobility data for the specified country.")
|
|
1810
1791
|
print(" - Usage: get_google_mobility_data(country, wc)")
|
|
1811
1792
|
print(" - Example: get_google_mobility_data('United Kingdom', 'mon')")
|
|
1812
1793
|
|
|
1813
|
-
print("\
|
|
1794
|
+
print("\n5. pull_seasonality")
|
|
1814
1795
|
print(" - Description: Generate combined dummy variables for seasonality, trends, and COVID lockdowns.")
|
|
1815
|
-
print(" - Usage:
|
|
1816
|
-
print(" - Example:
|
|
1796
|
+
print(" - Usage: pull_seasonality(week_commencing, start_date, countries)")
|
|
1797
|
+
print(" - Example: pull_seasonality('mon', '2020-01-01', ['US', 'GB'])")
|
|
1817
1798
|
|
|
1818
|
-
print("\
|
|
1799
|
+
print("\n6. pull_weather")
|
|
1819
1800
|
print(" - Description: Fetch and process historical weather data for the specified country.")
|
|
1820
1801
|
print(" - Usage: pull_weather(week_commencing, country)")
|
|
1821
1802
|
print(" - Example: pull_weather('mon', 'GBR')")
|
|
1822
|
-
|
|
1803
|
+
|
|
1804
|
+
print("\n7. pull_macro_ons_uk")
|
|
1805
|
+
print(" - Description: Fetch and process time series data from the Beta ONS API.")
|
|
1806
|
+
print(" - Usage: pull_macro_ons_uk(aditional_list, week_commencing, sector)")
|
|
1807
|
+
print(" - Example: pull_macro_ons_uk(['HBOI'], 'mon', 'fast_food')")
|
|
1808
|
+
|
|
1809
|
+
print("\n8. pull_yfinance")
|
|
1810
|
+
print(" - Description: Fetch and process time series data from the Beta ONS API.")
|
|
1811
|
+
print(" - Usage: pull_yfinance(tickers, week_start_day)")
|
|
1812
|
+
print(" - Example: pull_yfinance(['^FTMC', '^IXIC'], 'mon')")
|
|
1813
|
+
|
|
1823
1814
|
############################################################### MACRO ##########################################################################
|
|
1824
1815
|
|
|
1825
1816
|
def pull_fred_data(self, week_commencing: str = 'mon', series_id_list: list[str] = ["GPDIC1", "Y057RX1Q020SBEA", "GCEC1"]) -> pd.DataFrame:
|
|
@@ -1837,10 +1828,6 @@ class datapull:
|
|
|
1837
1828
|
----------
|
|
1838
1829
|
pd.DataFrame
|
|
1839
1830
|
Return a data frame with FRED data according to the series IDs provided
|
|
1840
|
-
|
|
1841
|
-
Example
|
|
1842
|
-
----------
|
|
1843
|
-
pull_fred_data("mon", ["GCEC1", "SP500"])
|
|
1844
1831
|
'''
|
|
1845
1832
|
# Fred API
|
|
1846
1833
|
fred = Fred(api_key='76f5f8156145fdb8fbaf66f1eb944f8a')
|
|
@@ -1958,107 +1945,7 @@ class datapull:
|
|
|
1958
1945
|
|
|
1959
1946
|
return df_final
|
|
1960
1947
|
|
|
1961
|
-
def
|
|
1962
|
-
"""
|
|
1963
|
-
Fetch and process time series data from the ONS API.
|
|
1964
|
-
|
|
1965
|
-
Args:
|
|
1966
|
-
series_list (list): A list of dictionaries where each dictionary represents a time series.
|
|
1967
|
-
Each dictionary should have the keys 'series_id' and 'dataset_id'.
|
|
1968
|
-
week_commencing (str): The starting day of the week for aggregation.
|
|
1969
|
-
Options are "mon", "tue", "wed", "thur", "fri", "sat", "sun".
|
|
1970
|
-
|
|
1971
|
-
Returns:
|
|
1972
|
-
pd.DataFrame: A DataFrame with weekly aggregated ONS data. The 'OBS' column contains the week
|
|
1973
|
-
commencing dates and other columns contain the aggregated time series values.
|
|
1974
|
-
"""
|
|
1975
|
-
|
|
1976
|
-
def parse_quarter(date_str):
|
|
1977
|
-
"""Parses a string in 'YYYY Q#' format into a datetime object."""
|
|
1978
|
-
year, quarter = date_str.split(' ')
|
|
1979
|
-
quarter_number = int(quarter[1])
|
|
1980
|
-
month = (quarter_number - 1) * 3 + 1
|
|
1981
|
-
return pd.Timestamp(f"{year}-{month:02d}-01")
|
|
1982
|
-
|
|
1983
|
-
# Generate a date range from 1950-01-01 to today
|
|
1984
|
-
date_range = pd.date_range(start="1950-01-01", end=datetime.today(), freq='D')
|
|
1985
|
-
daily_df = pd.DataFrame(date_range, columns=['OBS'])
|
|
1986
|
-
|
|
1987
|
-
# Keep track of the renamed value columns
|
|
1988
|
-
value_columns = []
|
|
1989
|
-
|
|
1990
|
-
for series in series_list:
|
|
1991
|
-
series_id = series['series_id']
|
|
1992
|
-
dataset_id = series['dataset_id']
|
|
1993
|
-
|
|
1994
|
-
# Construct the URL for data
|
|
1995
|
-
data_url = f"https://api.ons.gov.uk/timeseries/{series_id}/dataset/{dataset_id}/data"
|
|
1996
|
-
|
|
1997
|
-
# Make the request to the ONS API for data
|
|
1998
|
-
data_response = requests.get(data_url)
|
|
1999
|
-
|
|
2000
|
-
# Check if the request was successful
|
|
2001
|
-
if data_response.status_code != 200:
|
|
2002
|
-
print(f"Failed to fetch data for series {series_id}: {data_response.status_code} {data_response.text}")
|
|
2003
|
-
continue
|
|
2004
|
-
|
|
2005
|
-
# Parse the JSON response for data
|
|
2006
|
-
data = data_response.json()
|
|
2007
|
-
|
|
2008
|
-
# Attempt to extract the name of the time series from the data response
|
|
2009
|
-
series_name = data.get('description', {}).get('title', 'Value')
|
|
2010
|
-
|
|
2011
|
-
# Determine the most granular time series data available
|
|
2012
|
-
if 'months' in data and data['months']:
|
|
2013
|
-
time_series_data = data['months']
|
|
2014
|
-
elif 'quarters' in data and data['quarters']:
|
|
2015
|
-
time_series_data = data['quarters']
|
|
2016
|
-
elif 'years' in data and data['years']:
|
|
2017
|
-
time_series_data = data['years']
|
|
2018
|
-
else:
|
|
2019
|
-
print("No time series data found in the response")
|
|
2020
|
-
continue
|
|
2021
|
-
|
|
2022
|
-
# Create a DataFrame from the time series data
|
|
2023
|
-
df = pd.DataFrame(time_series_data)
|
|
2024
|
-
|
|
2025
|
-
# Handle different frequencies in the data
|
|
2026
|
-
if 'date' in df.columns:
|
|
2027
|
-
if any(df['date'].str.contains('Q')):
|
|
2028
|
-
df['date'] = df['date'].apply(parse_quarter)
|
|
2029
|
-
else:
|
|
2030
|
-
df['date'] = pd.to_datetime(df['date'])
|
|
2031
|
-
|
|
2032
|
-
df = df.rename(columns={'date': 'OBS', 'value': series_name})
|
|
2033
|
-
|
|
2034
|
-
# Rename the value column
|
|
2035
|
-
new_col_name = 'macro_' + series_name.lower().replace(':', '').replace(' ', '_').replace('-', '_')
|
|
2036
|
-
df = df.rename(columns={series_name: new_col_name})
|
|
2037
|
-
|
|
2038
|
-
# Track the renamed value column
|
|
2039
|
-
value_columns.append(new_col_name)
|
|
2040
|
-
|
|
2041
|
-
# Merge the data based on the observation date
|
|
2042
|
-
daily_df = pd.merge_asof(daily_df, df[['OBS', new_col_name]], on='OBS', direction='backward')
|
|
2043
|
-
|
|
2044
|
-
# Ensure columns are numeric
|
|
2045
|
-
for col in value_columns:
|
|
2046
|
-
if col in daily_df.columns:
|
|
2047
|
-
daily_df[col] = pd.to_numeric(daily_df[col], errors='coerce').fillna(0)
|
|
2048
|
-
else:
|
|
2049
|
-
print(f"Column {col} not found in daily_df")
|
|
2050
|
-
|
|
2051
|
-
# Aggregate results by week
|
|
2052
|
-
ons_df_final = ims_proc.aggregate_daily_to_wc_wide(df=daily_df,
|
|
2053
|
-
date_column="OBS",
|
|
2054
|
-
group_columns=[],
|
|
2055
|
-
sum_columns=value_columns,
|
|
2056
|
-
wc=week_commencing,
|
|
2057
|
-
aggregation="average")
|
|
2058
|
-
|
|
2059
|
-
return ons_df_final
|
|
2060
|
-
|
|
2061
|
-
def pull_oecd(self, country: str = "GBR", week_commencing: str = "mon", start_date: str = "1950-01-01") -> pd.DataFrame:
|
|
1948
|
+
def pull_oecd(self, country: str = "GBR", week_commencing: str = "mon", start_date: str = "2020-01-01") -> pd.DataFrame:
|
|
2062
1949
|
"""
|
|
2063
1950
|
Fetch and process time series data from the OECD API.
|
|
2064
1951
|
|
|
@@ -2235,135 +2122,160 @@ class datapull:
|
|
|
2235
2122
|
|
|
2236
2123
|
############################################################### Seasonality ##########################################################################
|
|
2237
2124
|
|
|
2238
|
-
def
|
|
2125
|
+
def pull_seasonality(self, week_commencing, start_date, countries):
|
|
2239
2126
|
# Week commencing dictionary
|
|
2240
2127
|
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
2241
2128
|
|
|
2242
|
-
# Create daily date range dataframe
|
|
2243
|
-
date_range = pd.date_range(
|
|
2129
|
+
# Create daily date range dataframe starting from start_date
|
|
2130
|
+
date_range = pd.date_range(
|
|
2131
|
+
start=pd.to_datetime(start_date),
|
|
2132
|
+
end=datetime.today(),
|
|
2133
|
+
freq="D"
|
|
2134
|
+
)
|
|
2244
2135
|
df_daily = pd.DataFrame(date_range, columns=["Date"])
|
|
2245
|
-
|
|
2246
|
-
#
|
|
2247
|
-
|
|
2136
|
+
|
|
2137
|
+
# ------------------------------------------------
|
|
2138
|
+
# 1. Identify "week_start" for each daily row
|
|
2139
|
+
# ------------------------------------------------
|
|
2140
|
+
df_daily['week_start'] = df_daily["Date"].apply(
|
|
2141
|
+
lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7)
|
|
2142
|
+
)
|
|
2143
|
+
|
|
2144
|
+
# ------------------------------------------------
|
|
2145
|
+
# 2. Build a weekly index (df_weekly_start) with dummy columns
|
|
2146
|
+
# ------------------------------------------------
|
|
2248
2147
|
df_weekly_start = df_daily[['week_start']].drop_duplicates().reset_index(drop=True)
|
|
2249
2148
|
df_weekly_start.rename(columns={'week_start': "Date"}, inplace=True)
|
|
2250
2149
|
|
|
2150
|
+
# Set index to weekly "start of week"
|
|
2251
2151
|
df_weekly_start.index = np.arange(1, len(df_weekly_start) + 1)
|
|
2252
2152
|
df_weekly_start.set_index("Date", inplace=True)
|
|
2253
|
-
|
|
2153
|
+
|
|
2254
2154
|
# Create individual weekly dummies
|
|
2255
2155
|
dummy_columns = {}
|
|
2256
2156
|
for i in range(len(df_weekly_start)):
|
|
2257
2157
|
col_name = f"dum_{df_weekly_start.index[i].strftime('%Y_%m_%d')}"
|
|
2258
2158
|
dummy_columns[col_name] = [0] * len(df_weekly_start)
|
|
2259
2159
|
dummy_columns[col_name][i] = 1
|
|
2260
|
-
|
|
2160
|
+
|
|
2261
2161
|
df_dummies = pd.DataFrame(dummy_columns, index=df_weekly_start.index)
|
|
2262
2162
|
df_weekly_start = pd.concat([df_weekly_start, df_dummies], axis=1)
|
|
2263
2163
|
|
|
2264
|
-
#
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
#
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
# Set date as index
|
|
2292
|
-
df_combined.set_index("Date", inplace=True)
|
|
2293
|
-
|
|
2294
|
-
# Create COVID lockdown dummies
|
|
2295
|
-
lockdown_periods = [
|
|
2296
|
-
# Lockdown 1
|
|
2297
|
-
("2020-03-23", "2020-05-24"),
|
|
2298
|
-
# Lockdown 2
|
|
2299
|
-
("2020-11-05", "2020-12-02"),
|
|
2300
|
-
# Lockdown 3
|
|
2301
|
-
("2021-01-04", "2021-03-08")
|
|
2302
|
-
]
|
|
2303
|
-
|
|
2304
|
-
df_covid = pd.DataFrame(date_range, columns=["Date"])
|
|
2305
|
-
df_covid["national_lockdown"] = 0
|
|
2306
|
-
|
|
2307
|
-
for start, end in lockdown_periods:
|
|
2308
|
-
df_covid.loc[(df_covid["Date"] >= start) & (df_covid["Date"] <= end), "national_lockdown"] = 1
|
|
2309
|
-
|
|
2310
|
-
df_covid['week_start'] = df_covid["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
2311
|
-
df_covid.drop("Date", axis=1, inplace=True)
|
|
2312
|
-
df_covid.rename(columns={"week_start": "OBS"}, inplace=True)
|
|
2313
|
-
df_national_lockdown_total = df_covid.groupby('OBS').sum(numeric_only=True)
|
|
2314
|
-
df_national_lockdown_total.rename(columns={"national_lockdown": "covid_uk_national_lockdown_total"}, inplace=True)
|
|
2315
|
-
|
|
2316
|
-
df_national_lockdown_1 = df_national_lockdown_total.copy(deep=True)
|
|
2317
|
-
df_national_lockdown_2 = df_national_lockdown_total.copy(deep=True)
|
|
2318
|
-
df_national_lockdown_3 = df_national_lockdown_total.copy(deep=True)
|
|
2319
|
-
|
|
2320
|
-
df_national_lockdown_1.loc[df_national_lockdown_1.index > "2020-05-24"] = 0
|
|
2321
|
-
df_national_lockdown_1.rename(columns={"covid_uk_national_lockdown_total": "covid_uk_national_lockdown_1"}, inplace=True)
|
|
2322
|
-
|
|
2323
|
-
df_national_lockdown_2.loc[df_national_lockdown_2.index < "2020-11-05"] = 0
|
|
2324
|
-
df_national_lockdown_2.loc[df_national_lockdown_2.index > "2020-12-02"] = 0
|
|
2325
|
-
df_national_lockdown_2.rename(columns={"covid_uk_national_lockdown_total": "covid_uk_national_lockdown_2"}, inplace=True)
|
|
2326
|
-
|
|
2327
|
-
df_national_lockdown_3.loc[df_national_lockdown_3.index < "2021-01-04"] = 0
|
|
2328
|
-
df_national_lockdown_3.rename(columns={"covid_uk_national_lockdown_total": "covid_uk_national_lockdown_3"}, inplace=True)
|
|
2164
|
+
# ------------------------------------------------
|
|
2165
|
+
# 3. Public holidays (daily) and specific holiday columns
|
|
2166
|
+
# ------------------------------------------------
|
|
2167
|
+
for country in countries:
|
|
2168
|
+
country_holidays = holidays.CountryHoliday(
|
|
2169
|
+
country,
|
|
2170
|
+
years=range(int(start_date[:4]), datetime.today().year + 1)
|
|
2171
|
+
)
|
|
2172
|
+
# Daily indicator: 1 if that date is a holiday
|
|
2173
|
+
df_daily[f"seas_holiday_{country.lower()}"] = df_daily["Date"].apply(
|
|
2174
|
+
lambda x: 1 if x in country_holidays else 0
|
|
2175
|
+
)
|
|
2176
|
+
# Create columns for specific holiday names
|
|
2177
|
+
for date_hol, name in country_holidays.items():
|
|
2178
|
+
col_name = f"seas_{name.replace(' ', '_').lower()}_{country.lower()}"
|
|
2179
|
+
if col_name not in df_daily.columns:
|
|
2180
|
+
df_daily[col_name] = 0
|
|
2181
|
+
df_daily.loc[df_daily["Date"] == pd.Timestamp(date_hol), col_name] = 1
|
|
2182
|
+
|
|
2183
|
+
# ------------------------------------------------
|
|
2184
|
+
# 4. Add daily indicators for last day & last Friday of month
|
|
2185
|
+
# Then aggregate them to weekly level using .max()
|
|
2186
|
+
# ------------------------------------------------
|
|
2187
|
+
# Last day of month (daily)
|
|
2188
|
+
df_daily["seas_last_day_of_month"] = df_daily["Date"].apply(
|
|
2189
|
+
lambda d: 1 if d == d.to_period("M").to_timestamp("M") else 0
|
|
2190
|
+
)
|
|
2329
2191
|
|
|
2330
|
-
|
|
2331
|
-
df_final_covid.reset_index(inplace=True)
|
|
2332
|
-
df_final_covid.rename(columns={"index": "OBS"}, inplace=True)
|
|
2333
|
-
|
|
2334
|
-
# Create seasonal indicators for the last day and last Friday of the month
|
|
2335
|
-
min_date = '2019-12-29'
|
|
2336
|
-
max_date = datetime.today().strftime('%Y-%m-%d')
|
|
2337
|
-
date_range_seas = pd.date_range(start=min_date, end=max_date)
|
|
2338
|
-
|
|
2339
|
-
df_seas = pd.DataFrame(date_range_seas, columns=['Date'])
|
|
2340
|
-
df_seas['Last_Day_of_Month'] = df_seas['Date'].apply(lambda x: 1 if x == x.to_period('M').to_timestamp('M') else 0)
|
|
2341
|
-
|
|
2192
|
+
# Last Friday of month (daily)
|
|
2342
2193
|
def is_last_friday(date):
|
|
2343
|
-
|
|
2194
|
+
# last day of the month
|
|
2195
|
+
last_day_of_month = date.to_period("M").to_timestamp("M")
|
|
2344
2196
|
last_day_weekday = last_day_of_month.dayofweek
|
|
2197
|
+
# Determine how many days we go back from the last day to get Friday
|
|
2345
2198
|
if last_day_weekday >= 4:
|
|
2346
2199
|
days_to_subtract = last_day_weekday - 4
|
|
2347
2200
|
else:
|
|
2348
2201
|
days_to_subtract = last_day_weekday + 3
|
|
2349
2202
|
last_friday = last_day_of_month - pd.Timedelta(days=days_to_subtract)
|
|
2350
2203
|
return 1 if date == last_friday else 0
|
|
2204
|
+
|
|
2205
|
+
df_daily["seas_last_friday_of_month"] = df_daily["Date"].apply(is_last_friday)
|
|
2206
|
+
|
|
2207
|
+
# ------------------------------------------------
|
|
2208
|
+
# 5. Weekly aggregation for HOLIDAYS & monthly dummies
|
|
2209
|
+
# (Using .max() for holiday indicators so they become binary)
|
|
2210
|
+
# ------------------------------------------------
|
|
2211
|
+
# For monthly dummies, create a daily col "Month", then get_dummies
|
|
2212
|
+
df_daily["Month"] = df_daily["Date"].dt.month_name().str.lower()
|
|
2213
|
+
df_monthly_dummies = pd.get_dummies(
|
|
2214
|
+
df_daily,
|
|
2215
|
+
prefix="seas",
|
|
2216
|
+
columns=["Month"],
|
|
2217
|
+
dtype=int
|
|
2218
|
+
)
|
|
2219
|
+
# Recalculate 'week_start' (already in df_daily, but just to be sure)
|
|
2220
|
+
df_monthly_dummies['week_start'] = df_daily['week_start']
|
|
2221
|
+
|
|
2222
|
+
# Group monthly dummies by .sum() or .mean()—often we average across the week
|
|
2223
|
+
df_monthly_dummies = (
|
|
2224
|
+
df_monthly_dummies
|
|
2225
|
+
.groupby('week_start')
|
|
2226
|
+
.sum(numeric_only=True) # sum the daily flags
|
|
2227
|
+
.reset_index()
|
|
2228
|
+
.rename(columns={'week_start': "Date"})
|
|
2229
|
+
.set_index("Date")
|
|
2230
|
+
)
|
|
2231
|
+
# Divide the monthly dummy columns by 7 to spread them across the week
|
|
2232
|
+
monthly_cols = [
|
|
2233
|
+
c for c in df_monthly_dummies.columns
|
|
2234
|
+
if c.startswith("seas_month_")
|
|
2235
|
+
]
|
|
2236
|
+
df_monthly_dummies[monthly_cols] = df_monthly_dummies[monthly_cols] / 7
|
|
2237
|
+
|
|
2238
|
+
# Group holiday columns (and last-day-of-month columns) by .max() => binary
|
|
2239
|
+
df_holidays = (
|
|
2240
|
+
df_daily
|
|
2241
|
+
.groupby('week_start')
|
|
2242
|
+
.max(numeric_only=True) # use max => if any day=1, entire week=1
|
|
2243
|
+
.reset_index()
|
|
2244
|
+
.rename(columns={'week_start': "Date"})
|
|
2245
|
+
.set_index("Date")
|
|
2246
|
+
)
|
|
2351
2247
|
|
|
2352
|
-
|
|
2248
|
+
# ------------------------------------------------
|
|
2249
|
+
# 6. Combine weekly start, monthly dummies, holiday flags
|
|
2250
|
+
# ------------------------------------------------
|
|
2251
|
+
df_combined = pd.concat([df_weekly_start, df_monthly_dummies], axis=1)
|
|
2252
|
+
df_combined = pd.concat([df_combined, df_holidays], axis=1)
|
|
2253
|
+
df_combined = df_combined.loc[:, ~df_combined.columns.duplicated()]
|
|
2254
|
+
|
|
2255
|
+
# ------------------------------------------------
|
|
2256
|
+
# 7. Create weekly dummies for Week of Year & yearly dummies
|
|
2257
|
+
# ------------------------------------------------
|
|
2258
|
+
df_combined.reset_index(inplace=True)
|
|
2259
|
+
df_combined.rename(columns={"index": "old_index"}, inplace=True) # just in case
|
|
2353
2260
|
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
df_seas.set_index("Date", inplace=True)
|
|
2261
|
+
df_combined["Week"] = df_combined["Date"].dt.isocalendar().week
|
|
2262
|
+
df_combined = pd.get_dummies(df_combined, prefix="seas", columns=["Week"], dtype=int)
|
|
2357
2263
|
|
|
2358
|
-
|
|
2359
|
-
df_combined =
|
|
2360
|
-
|
|
2361
|
-
|
|
2362
|
-
|
|
2363
|
-
#
|
|
2364
|
-
|
|
2264
|
+
df_combined["Year"] = df_combined["Date"].dt.year
|
|
2265
|
+
df_combined = pd.get_dummies(df_combined, prefix="seas", columns=["Year"], dtype=int)
|
|
2266
|
+
|
|
2267
|
+
# ------------------------------------------------
|
|
2268
|
+
# 8. Add constant & trend
|
|
2269
|
+
# ------------------------------------------------
|
|
2270
|
+
df_combined["Constant"] = 1
|
|
2271
|
+
df_combined["Trend"] = df_combined.index + 1
|
|
2365
2272
|
|
|
2366
|
-
|
|
2273
|
+
# ------------------------------------------------
|
|
2274
|
+
# 9. Rename Date -> OBS and return
|
|
2275
|
+
# ------------------------------------------------
|
|
2276
|
+
df_combined.rename(columns={"Date": "OBS"}, inplace=True)
|
|
2277
|
+
|
|
2278
|
+
return df_combined
|
|
2367
2279
|
|
|
2368
2280
|
def pull_weather(self, week_commencing, country) -> pd.DataFrame:
|
|
2369
2281
|
import pandas as pd
|
|
@@ -2966,4 +2878,240 @@ class datapull:
|
|
|
2966
2878
|
|
|
2967
2879
|
final_weather = ims_proc.rename_cols(merged_df, 'seas_')
|
|
2968
2880
|
|
|
2969
|
-
return final_weather
|
|
2881
|
+
return final_weather
|
|
2882
|
+
|
|
2883
|
+
def pull_macro_ons_uk(self, cdid_list=None, week_start_day="mon", sector=None):
|
|
2884
|
+
"""
|
|
2885
|
+
Fetches time series data for multiple CDIDs from the ONS API, converts it to daily frequency,
|
|
2886
|
+
aggregates it to weekly averages, and renames variables based on specified rules.
|
|
2887
|
+
|
|
2888
|
+
Parameters:
|
|
2889
|
+
cdid_list (list): A list of additional CDIDs to fetch (e.g., ['JP9Z', 'UKPOP']). Defaults to None.
|
|
2890
|
+
week_start_day (str): The day the week starts on (e.g., 'Monday', 'Sunday').
|
|
2891
|
+
sector (str): The sector for which the standard CDIDs are fetched (e.g., 'fast_food', 'retail').
|
|
2892
|
+
|
|
2893
|
+
Returns:
|
|
2894
|
+
pd.DataFrame: A DataFrame with weekly frequency, containing a 'week_commencing' column
|
|
2895
|
+
and all series as renamed columns.
|
|
2896
|
+
"""
|
|
2897
|
+
# Define CDIDs for sectors and defaults
|
|
2898
|
+
sector_cdids = {
|
|
2899
|
+
"fast_food": ["L7TD", "L78Q", "DOAD"],
|
|
2900
|
+
"default": ["D7G7", "MGSX", "UKPOP", "IHYQ", "YBEZ", "MS77"],
|
|
2901
|
+
}
|
|
2902
|
+
|
|
2903
|
+
default_cdids = sector_cdids["default"]
|
|
2904
|
+
sector_specific_cdids = sector_cdids.get(sector, [])
|
|
2905
|
+
standard_cdids = list(set(default_cdids + sector_specific_cdids)) # Avoid duplicates
|
|
2906
|
+
|
|
2907
|
+
# Combine standard CDIDs and additional CDIDs
|
|
2908
|
+
if cdid_list is None:
|
|
2909
|
+
cdid_list = []
|
|
2910
|
+
cdid_list = list(set(standard_cdids + cdid_list)) # Avoid duplicates
|
|
2911
|
+
|
|
2912
|
+
base_search_url = "https://api.beta.ons.gov.uk/v1/search?content_type=timeseries&cdids="
|
|
2913
|
+
base_data_url = "https://api.beta.ons.gov.uk/v1/data?uri="
|
|
2914
|
+
combined_df = pd.DataFrame()
|
|
2915
|
+
|
|
2916
|
+
# Map week start day to pandas weekday convention
|
|
2917
|
+
days_map = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
2918
|
+
if week_start_day not in days_map:
|
|
2919
|
+
raise ValueError("Invalid week start day. Choose from: " + ", ".join(days_map.keys()))
|
|
2920
|
+
week_start = days_map[week_start_day]
|
|
2921
|
+
|
|
2922
|
+
for cdid in cdid_list:
|
|
2923
|
+
try:
|
|
2924
|
+
# Search for the series
|
|
2925
|
+
search_url = f"{base_search_url}{cdid}"
|
|
2926
|
+
search_response = requests.get(search_url)
|
|
2927
|
+
search_response.raise_for_status()
|
|
2928
|
+
search_data = search_response.json()
|
|
2929
|
+
|
|
2930
|
+
items = search_data.get("items", [])
|
|
2931
|
+
if not items:
|
|
2932
|
+
print(f"No data found for CDID: {cdid}")
|
|
2933
|
+
continue
|
|
2934
|
+
|
|
2935
|
+
# Extract series name and latest release URI
|
|
2936
|
+
series_name = items[0].get("title", f"Series_{cdid}")
|
|
2937
|
+
latest_date = max(
|
|
2938
|
+
datetime.fromisoformat(item["release_date"].replace("Z", "+00:00"))
|
|
2939
|
+
for item in items if "release_date" in item
|
|
2940
|
+
)
|
|
2941
|
+
latest_uri = next(
|
|
2942
|
+
item["uri"] for item in items
|
|
2943
|
+
if "release_date" in item and datetime.fromisoformat(item["release_date"].replace("Z", "+00:00")) == latest_date
|
|
2944
|
+
)
|
|
2945
|
+
|
|
2946
|
+
# Fetch the dataset
|
|
2947
|
+
data_url = f"{base_data_url}{latest_uri}"
|
|
2948
|
+
data_response = requests.get(data_url)
|
|
2949
|
+
data_response.raise_for_status()
|
|
2950
|
+
data_json = data_response.json()
|
|
2951
|
+
|
|
2952
|
+
# Detect the frequency and process accordingly
|
|
2953
|
+
if "months" in data_json and data_json["months"]:
|
|
2954
|
+
frequency_key = "months"
|
|
2955
|
+
elif "quarters" in data_json and data_json["quarters"]:
|
|
2956
|
+
frequency_key = "quarters"
|
|
2957
|
+
elif "years" in data_json and data_json["years"]:
|
|
2958
|
+
frequency_key = "years"
|
|
2959
|
+
else:
|
|
2960
|
+
print(f"Unsupported frequency or no data for CDID: {cdid}")
|
|
2961
|
+
continue
|
|
2962
|
+
|
|
2963
|
+
# Prepare the DataFrame
|
|
2964
|
+
df = pd.DataFrame(data_json[frequency_key])
|
|
2965
|
+
|
|
2966
|
+
# Parse the 'date' field based on frequency
|
|
2967
|
+
if frequency_key == "months":
|
|
2968
|
+
df["date"] = pd.to_datetime(df["date"], format="%Y %b", errors="coerce")
|
|
2969
|
+
elif frequency_key == "quarters":
|
|
2970
|
+
def parse_quarter(quarter_str):
|
|
2971
|
+
year, qtr = quarter_str.split(" Q")
|
|
2972
|
+
month = {"1": 1, "2": 4, "3": 7, "4": 10}[qtr]
|
|
2973
|
+
return datetime(int(year), month, 1)
|
|
2974
|
+
df["date"] = df["date"].apply(parse_quarter)
|
|
2975
|
+
elif frequency_key == "years":
|
|
2976
|
+
df["date"] = pd.to_datetime(df["date"], format="%Y", errors="coerce")
|
|
2977
|
+
|
|
2978
|
+
df["value"] = pd.to_numeric(df["value"], errors="coerce")
|
|
2979
|
+
df.rename(columns={"value": series_name}, inplace=True)
|
|
2980
|
+
|
|
2981
|
+
# Combine data
|
|
2982
|
+
df = df.loc[:, ["date", series_name]].dropna().reset_index(drop=True)
|
|
2983
|
+
if combined_df.empty:
|
|
2984
|
+
combined_df = df
|
|
2985
|
+
else:
|
|
2986
|
+
combined_df = pd.merge(combined_df, df, on="date", how="outer")
|
|
2987
|
+
|
|
2988
|
+
except requests.exceptions.RequestException as e:
|
|
2989
|
+
print(f"Error fetching data for CDID {cdid}: {e}")
|
|
2990
|
+
except (KeyError, ValueError) as e:
|
|
2991
|
+
print(f"Error processing data for CDID {cdid}: {e}")
|
|
2992
|
+
|
|
2993
|
+
if not combined_df.empty:
|
|
2994
|
+
min_date = combined_df["date"].min()
|
|
2995
|
+
max_date = datetime.today()
|
|
2996
|
+
date_range = pd.date_range(start=min_date, end=max_date, freq='D')
|
|
2997
|
+
daily_df = pd.DataFrame(date_range, columns=['date'])
|
|
2998
|
+
daily_df = pd.merge(daily_df, combined_df, on="date", how="left")
|
|
2999
|
+
daily_df = daily_df.ffill()
|
|
3000
|
+
|
|
3001
|
+
# Aggregate to weekly frequency
|
|
3002
|
+
daily_df["week_commencing"] = daily_df["date"] - pd.to_timedelta((daily_df["date"].dt.weekday - week_start) % 7, unit='D')
|
|
3003
|
+
weekly_df = daily_df.groupby("week_commencing").mean(numeric_only=True).reset_index()
|
|
3004
|
+
|
|
3005
|
+
def clean_column_name(name):
|
|
3006
|
+
name = re.sub(r"\(.*?\)", "", name)
|
|
3007
|
+
name = re.split(r":", name)[0]
|
|
3008
|
+
name = re.sub(r"\d+", "", name)
|
|
3009
|
+
name = re.sub(r"\b(annual|rate)\b", "", name, flags=re.IGNORECASE)
|
|
3010
|
+
name = re.sub(r"[^\w\s]", "", name)
|
|
3011
|
+
name = name.replace(" ", "_")
|
|
3012
|
+
name = re.sub(r"_+", "_", name)
|
|
3013
|
+
name = name.rstrip("_")
|
|
3014
|
+
return f"macro_{name.lower()}_uk"
|
|
3015
|
+
|
|
3016
|
+
weekly_df.columns = [clean_column_name(col) if col != "week_commencing" else col for col in weekly_df.columns]
|
|
3017
|
+
weekly_df.rename(columns={"week_commencing": "OBS"}, inplace=True)
|
|
3018
|
+
|
|
3019
|
+
weekly_df = weekly_df.fillna(0)
|
|
3020
|
+
|
|
3021
|
+
return weekly_df
|
|
3022
|
+
else:
|
|
3023
|
+
print("No data available to process.")
|
|
3024
|
+
return pd.DataFrame()
|
|
3025
|
+
|
|
3026
|
+
def pull_yfinance(self, tickers=None, week_start_day="mon"):
|
|
3027
|
+
"""
|
|
3028
|
+
Fetches stock data for multiple tickers from Yahoo Finance, converts it to daily frequency,
|
|
3029
|
+
aggregates it to weekly averages, and renames variables.
|
|
3030
|
+
|
|
3031
|
+
Parameters:
|
|
3032
|
+
tickers (list): A list of additional stock tickers to fetch (e.g., ['AAPL', 'MSFT']). Defaults to None.
|
|
3033
|
+
week_start_day (str): The day the week starts on (e.g., 'Monday', 'Sunday').
|
|
3034
|
+
|
|
3035
|
+
Returns:
|
|
3036
|
+
pd.DataFrame: A DataFrame with weekly frequency, containing an 'OBS' column
|
|
3037
|
+
and aggregated stock data for the specified tickers, with NaN values filled with 0.
|
|
3038
|
+
"""
|
|
3039
|
+
# Define default tickers
|
|
3040
|
+
default_tickers = ["^FTSE", "GBPUSD=X", "GBPEUR=X", "^GSPC"]
|
|
3041
|
+
|
|
3042
|
+
# Combine default tickers with additional ones
|
|
3043
|
+
if tickers is None:
|
|
3044
|
+
tickers = []
|
|
3045
|
+
tickers = list(set(default_tickers + tickers)) # Ensure no duplicates
|
|
3046
|
+
|
|
3047
|
+
# Automatically set end_date to today
|
|
3048
|
+
end_date = datetime.today().strftime("%Y-%m-%d")
|
|
3049
|
+
|
|
3050
|
+
# Mapping week start day to pandas weekday convention
|
|
3051
|
+
days_map = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
3052
|
+
if week_start_day not in days_map:
|
|
3053
|
+
raise ValueError("Invalid week start day. Choose from: " + ", ".join(days_map.keys()))
|
|
3054
|
+
week_start = days_map[week_start_day]
|
|
3055
|
+
|
|
3056
|
+
# Fetch data for all tickers without specifying a start date to get all available data
|
|
3057
|
+
data = yf.download(tickers, end=end_date, group_by="ticker", auto_adjust=True)
|
|
3058
|
+
|
|
3059
|
+
# Process the data
|
|
3060
|
+
combined_df = pd.DataFrame()
|
|
3061
|
+
for ticker in tickers:
|
|
3062
|
+
try:
|
|
3063
|
+
# Extract the ticker's data
|
|
3064
|
+
ticker_data = data[ticker] if len(tickers) > 1 else data
|
|
3065
|
+
ticker_data = ticker_data.reset_index()
|
|
3066
|
+
|
|
3067
|
+
# Ensure necessary columns are present
|
|
3068
|
+
if "Close" not in ticker_data.columns:
|
|
3069
|
+
raise ValueError(f"Ticker {ticker} does not have 'Close' price data.")
|
|
3070
|
+
|
|
3071
|
+
# Keep only relevant columns
|
|
3072
|
+
ticker_data = ticker_data[["Date", "Close"]]
|
|
3073
|
+
ticker_data.rename(columns={"Close": ticker}, inplace=True)
|
|
3074
|
+
|
|
3075
|
+
# Merge data
|
|
3076
|
+
if combined_df.empty:
|
|
3077
|
+
combined_df = ticker_data
|
|
3078
|
+
else:
|
|
3079
|
+
combined_df = pd.merge(combined_df, ticker_data, on="Date", how="outer")
|
|
3080
|
+
|
|
3081
|
+
except KeyError:
|
|
3082
|
+
print(f"Data for ticker {ticker} not available.")
|
|
3083
|
+
except Exception as e:
|
|
3084
|
+
print(f"Error processing ticker {ticker}: {e}")
|
|
3085
|
+
|
|
3086
|
+
if not combined_df.empty:
|
|
3087
|
+
# Convert to daily frequency
|
|
3088
|
+
combined_df["Date"] = pd.to_datetime(combined_df["Date"])
|
|
3089
|
+
combined_df.set_index("Date", inplace=True)
|
|
3090
|
+
|
|
3091
|
+
# Fill missing dates
|
|
3092
|
+
min_date = combined_df.index.min()
|
|
3093
|
+
max_date = combined_df.index.max()
|
|
3094
|
+
daily_index = pd.date_range(start=min_date, end=max_date, freq='D')
|
|
3095
|
+
combined_df = combined_df.reindex(daily_index)
|
|
3096
|
+
combined_df.index.name = "Date"
|
|
3097
|
+
combined_df = combined_df.ffill()
|
|
3098
|
+
|
|
3099
|
+
# Aggregate to weekly frequency
|
|
3100
|
+
combined_df["OBS"] = combined_df.index - pd.to_timedelta((combined_df.index.weekday - week_start) % 7, unit="D")
|
|
3101
|
+
weekly_df = combined_df.groupby("OBS").mean(numeric_only=True).reset_index()
|
|
3102
|
+
|
|
3103
|
+
# Fill NaN values with 0
|
|
3104
|
+
weekly_df = weekly_df.fillna(0)
|
|
3105
|
+
|
|
3106
|
+
# Clean column names
|
|
3107
|
+
def clean_column_name(name):
|
|
3108
|
+
name = re.sub(r"[^\w\s]", "", name)
|
|
3109
|
+
return f"macro_{name.lower()}"
|
|
3110
|
+
|
|
3111
|
+
weekly_df.columns = [clean_column_name(col) if col != "OBS" else col for col in weekly_df.columns]
|
|
3112
|
+
|
|
3113
|
+
return weekly_df
|
|
3114
|
+
|
|
3115
|
+
else:
|
|
3116
|
+
print("No data available to process.")
|
|
3117
|
+
return pd.DataFrame()
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: imsciences
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: IMS Data Processing Package
|
|
5
5
|
Author: IMS
|
|
6
6
|
Author-email: cam@im-sciences.com
|
|
7
|
-
License: MIT
|
|
8
7
|
Keywords: python,data processing,apis
|
|
9
8
|
Classifier: Development Status :: 3 - Alpha
|
|
10
9
|
Classifier: Intended Audience :: Developers
|
|
@@ -20,93 +19,113 @@ Requires-Dist: fredapi
|
|
|
20
19
|
Requires-Dist: requests-cache
|
|
21
20
|
Requires-Dist: geopy
|
|
22
21
|
Requires-Dist: bs4
|
|
22
|
+
Requires-Dist: yfinance
|
|
23
|
+
Requires-Dist: holidays
|
|
23
24
|
|
|
24
25
|
# IMS Package Documentation
|
|
25
26
|
|
|
26
|
-
The
|
|
27
|
+
The **IMSciences package** is a Python library designed to process incoming data into a format tailored for econometrics projects, particularly those utilising weekly time series data. This package offers a suite of functions for efficient data manipulation and analysis.
|
|
27
28
|
|
|
28
|
-
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Key Features
|
|
32
|
+
- Seamless data processing for econometrics workflows.
|
|
33
|
+
- Aggregation, filtering, and transformation of time series data.
|
|
34
|
+
- Integration with external data sources like FRED, Bank of England, ONS and OECD.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
Table of Contents
|
|
39
|
+
=================
|
|
29
40
|
|
|
30
|
-
|
|
41
|
+
1. `Data Processing <#data-processing>`_
|
|
42
|
+
2. `Data Pulling <#data-pulling>`_
|
|
43
|
+
3. `Installation <#installation>`_
|
|
44
|
+
4. `Usage <#usage>`_
|
|
45
|
+
5. `License <#license>`_
|
|
46
|
+
|
|
47
|
+
---
|
|
31
48
|
|
|
32
|
-
##
|
|
49
|
+
## Data Processing
|
|
50
|
+
|
|
51
|
+
## 1. get_wd_levels
|
|
33
52
|
- **Description**: Get the working directory with the option of moving up parents.
|
|
34
53
|
- **Usage**: `get_wd_levels(levels)`
|
|
35
54
|
- **Example**: `get_wd_levels(0)`
|
|
36
55
|
|
|
37
56
|
---
|
|
38
57
|
|
|
39
|
-
## 2.
|
|
58
|
+
## 2. remove_rows
|
|
40
59
|
- **Description**: Removes a specified number of rows from a pandas DataFrame.
|
|
41
60
|
- **Usage**: `remove_rows(data_frame, num_rows_to_remove)`
|
|
42
61
|
- **Example**: `remove_rows(df, 2)`
|
|
43
62
|
|
|
44
63
|
---
|
|
45
64
|
|
|
46
|
-
## 3.
|
|
65
|
+
## 3. aggregate_daily_to_wc_long
|
|
47
66
|
- **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.
|
|
48
67
|
- **Usage**: `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum')`
|
|
49
68
|
- **Example**: `aggregate_daily_to_wc_long(df, 'date', ['platform'], ['cost', 'impressions', 'clicks'], 'mon', 'average')`
|
|
50
69
|
|
|
51
70
|
---
|
|
52
71
|
|
|
53
|
-
## 4.
|
|
72
|
+
## 4. convert_monthly_to_daily
|
|
54
73
|
- **Description**: Converts monthly data in a DataFrame to daily data by expanding and dividing the numeric values.
|
|
55
74
|
- **Usage**: `convert_monthly_to_daily(df, date_column, divide)`
|
|
56
75
|
- **Example**: `convert_monthly_to_daily(df, 'date')`
|
|
57
76
|
|
|
58
77
|
---
|
|
59
78
|
|
|
60
|
-
## 5.
|
|
79
|
+
## 5. plot_two
|
|
61
80
|
- **Description**: Plots specified columns from two different DataFrames using a shared date column. Useful for comparing data.
|
|
62
81
|
- **Usage**: `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
|
|
63
82
|
- **Example**: `plot_two(df1, 'cost', df2, 'cost', 'obs', True)`
|
|
64
83
|
|
|
65
84
|
---
|
|
66
85
|
|
|
67
|
-
## 6.
|
|
86
|
+
## 6. remove_nan_rows
|
|
68
87
|
- **Description**: Removes rows from a DataFrame where the specified column has NaN values.
|
|
69
88
|
- **Usage**: `remove_nan_rows(df, col_to_remove_rows)`
|
|
70
89
|
- **Example**: `remove_nan_rows(df, 'date')`
|
|
71
90
|
|
|
72
91
|
---
|
|
73
92
|
|
|
74
|
-
## 7.
|
|
93
|
+
## 7. filter_rows
|
|
75
94
|
- **Description**: Filters the DataFrame based on whether the values in a specified column are in a provided list.
|
|
76
95
|
- **Usage**: `filter_rows(df, col_to_filter, list_of_filters)`
|
|
77
96
|
- **Example**: `filter_rows(df, 'country', ['UK', 'IE'])`
|
|
78
97
|
|
|
79
98
|
---
|
|
80
99
|
|
|
81
|
-
## 8.
|
|
100
|
+
## 8. plot_one
|
|
82
101
|
- **Description**: Plots a specified column from a DataFrame.
|
|
83
102
|
- **Usage**: `plot_one(df1, col1, date_column)`
|
|
84
103
|
- **Example**: `plot_one(df, 'Spend', 'OBS')`
|
|
85
104
|
|
|
86
105
|
---
|
|
87
106
|
|
|
88
|
-
## 9.
|
|
107
|
+
## 9. week_of_year_mapping
|
|
89
108
|
- **Description**: Converts a week column in `yyyy-Www` or `yyyy-ww` format to week commencing date.
|
|
90
109
|
- **Usage**: `week_of_year_mapping(df, week_col, start_day_str)`
|
|
91
110
|
- **Example**: `week_of_year_mapping(df, 'week', 'mon')`
|
|
92
111
|
|
|
93
112
|
---
|
|
94
113
|
|
|
95
|
-
## 10.
|
|
114
|
+
## 10. exclude_rows
|
|
96
115
|
- **Description**: Removes rows from a DataFrame based on whether the values in a specified column are not in a provided list.
|
|
97
116
|
- **Usage**: `exclude_rows(df, col_to_filter, list_of_filters)`
|
|
98
117
|
- **Example**: `exclude_rows(df, 'week', ['2022-W20', '2022-W21'])`
|
|
99
118
|
|
|
100
119
|
---
|
|
101
120
|
|
|
102
|
-
## 11.
|
|
121
|
+
## 11. rename_cols
|
|
103
122
|
- **Description**: Renames columns in a pandas DataFrame.
|
|
104
123
|
- **Usage**: `rename_cols(df, name)`
|
|
105
124
|
- **Example**: `rename_cols(df, 'ame_facebook')`
|
|
106
125
|
|
|
107
126
|
---
|
|
108
127
|
|
|
109
|
-
## 12.
|
|
128
|
+
## 12. merge_new_and_old
|
|
110
129
|
- **Description**: Creates a new DataFrame with two columns: one for dates and one for merged numeric values.
|
|
111
130
|
- Merges numeric values from specified columns in the old and new DataFrames based on a given cutoff date.
|
|
112
131
|
- **Usage**: `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
|
|
@@ -114,21 +133,21 @@ The IMS package is a python library for processing incoming data into a format t
|
|
|
114
133
|
|
|
115
134
|
---
|
|
116
135
|
|
|
117
|
-
## 13.
|
|
136
|
+
## 13. merge_dataframes_on_date
|
|
118
137
|
- **Description**: Merge a list of DataFrames on a common column.
|
|
119
138
|
- **Usage**: `merge_dataframes_on_date(dataframes, common_column='OBS', merge_how='outer')`
|
|
120
139
|
- **Example**: `merge_dataframes_on_date([df1, df2, df3], common_column='OBS', merge_how='outer')`
|
|
121
140
|
|
|
122
141
|
---
|
|
123
142
|
|
|
124
|
-
## 14.
|
|
143
|
+
## 14. merge_and_update_dfs
|
|
125
144
|
- **Description**: Merges two dataframes on a key column, updates the first dataframe's columns with the second's where available, and returns a dataframe sorted by the key column.
|
|
126
145
|
- **Usage**: `merge_and_update_dfs(df1, df2, key_column)`
|
|
127
146
|
- **Example**: `merge_and_update_dfs(processed_facebook, finalised_meta, 'OBS')`
|
|
128
147
|
|
|
129
148
|
---
|
|
130
149
|
|
|
131
|
-
## 15.
|
|
150
|
+
## 15. convert_us_to_uk_dates
|
|
132
151
|
- **Description**: Convert a DataFrame column with mixed date formats to datetime.
|
|
133
152
|
- **Usage**: `convert_us_to_uk_dates(df, date_col)`
|
|
134
153
|
- **Example**: `convert_us_to_uk_dates(df, 'date')`
|
|
@@ -142,189 +161,189 @@ The IMS package is a python library for processing incoming data into a format t
|
|
|
142
161
|
|
|
143
162
|
---
|
|
144
163
|
|
|
145
|
-
## 17.
|
|
164
|
+
## 17. pivot_table
|
|
146
165
|
- **Description**: Dynamically pivots a DataFrame based on specified columns.
|
|
147
166
|
- **Usage**: `pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name='Total', datetime_trans_needed=True, reverse_header_order=False, fill_missing_weekly_dates=False, week_commencing='W-MON')`
|
|
148
167
|
- **Example**: `pivot_table(df, 'OBS', 'Channel Short Names', 'Value', filters_dict={'Master Include': ' == 1', 'OBS': ' >= datetime(2019,9,9)', 'Metric Short Names': ' == spd'}, fill_value=0, aggfunc='sum', margins=False, margins_name='Total', datetime_trans_needed=True, reverse_header_order=True, fill_missing_weekly_dates=True, week_commencing='W-MON')`
|
|
149
168
|
|
|
150
169
|
---
|
|
151
170
|
|
|
152
|
-
## 18.
|
|
171
|
+
## 18. apply_lookup_table_for_columns
|
|
153
172
|
- **Description**: Equivalent of XLOOKUP in Excel. Allows mapping of a dictionary of substrings within a column.
|
|
154
173
|
- **Usage**: `apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict='Other', new_column_name='Mapping')`
|
|
155
174
|
- **Example**: `apply_lookup_table_for_columns(df, col_names, {'spend': 'spd', 'clicks': 'clk'}, if_not_in_dict='Other', new_column_name='Metrics Short')`
|
|
156
175
|
|
|
157
176
|
---
|
|
158
177
|
|
|
159
|
-
## 19.
|
|
178
|
+
## 19. aggregate_daily_to_wc_wide
|
|
160
179
|
- **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.
|
|
161
180
|
- **Usage**: `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
|
|
162
181
|
- **Example**: `aggregate_daily_to_wc_wide(df, 'date', ['platform'], ['cost', 'impressions', 'clicks'], 'mon', 'average', True)`
|
|
163
182
|
|
|
164
183
|
---
|
|
165
184
|
|
|
166
|
-
## 20.
|
|
185
|
+
## 20. merge_cols_with_seperator
|
|
167
186
|
- **Description**: Merges multiple columns in a DataFrame into one column with a separator `_`. Useful for lookup tables.
|
|
168
187
|
- **Usage**: `merge_cols_with_seperator(df, col_names, seperator='_', output_column_name='Merged', starting_prefix_str=None, ending_prefix_str=None)`
|
|
169
188
|
- **Example**: `merge_cols_with_seperator(df, ['Campaign', 'Product'], seperator='|', output_column_name='Merged Columns', starting_prefix_str='start_', ending_prefix_str='_end')`
|
|
170
189
|
|
|
171
190
|
---
|
|
172
191
|
|
|
173
|
-
## 21.
|
|
192
|
+
## 21. check_sum_of_df_cols_are_equal
|
|
174
193
|
- **Description**: Checks if the sum of two columns in two DataFrames are the same, and provides the sums and differences.
|
|
175
194
|
- **Usage**: `check_sum_of_df_cols_are_equal(df_1, df_2, cols_1, cols_2)`
|
|
176
195
|
- **Example**: `check_sum_of_df_cols_are_equal(df_1, df_2, 'Media Cost', 'Spend')`
|
|
177
196
|
|
|
178
197
|
---
|
|
179
198
|
|
|
180
|
-
## 22.
|
|
199
|
+
## 22. convert_2_df_cols_to_dict
|
|
181
200
|
- **Description**: Creates a dictionary using two columns in a DataFrame.
|
|
182
201
|
- **Usage**: `convert_2_df_cols_to_dict(df, key_col, value_col)`
|
|
183
202
|
- **Example**: `convert_2_df_cols_to_dict(df, 'Campaign', 'Channel')`
|
|
184
203
|
|
|
185
204
|
---
|
|
186
205
|
|
|
187
|
-
## 23.
|
|
206
|
+
## 23. create_FY_and_H_columns
|
|
188
207
|
- **Description**: Creates financial year, half-year, and financial half-year columns.
|
|
189
208
|
- **Usage**: `create_FY_and_H_columns(df, index_col, start_date, starting_FY, short_format='No', half_years='No', combined_FY_and_H='No')`
|
|
190
209
|
- **Example**: `create_FY_and_H_columns(df, 'Week (M-S)', '2022-10-03', 'FY2023', short_format='Yes', half_years='Yes', combined_FY_and_H='Yes')`
|
|
191
210
|
|
|
192
211
|
---
|
|
193
212
|
|
|
194
|
-
## 24.
|
|
213
|
+
## 24. keyword_lookup_replacement
|
|
195
214
|
- **Description**: Updates chosen values in a specified column of the DataFrame based on a lookup dictionary.
|
|
196
215
|
- **Usage**: `keyword_lookup_replacement(df, col, replacement_rows, cols_to_merge, replacement_lookup_dict, output_column_name='Updated Column')`
|
|
197
216
|
- **Example**: `keyword_lookup_replacement(df, 'channel', 'Paid Search Generic', ['channel', 'segment', 'product'], qlik_dict_for_channel, output_column_name='Channel New')`
|
|
198
217
|
|
|
199
218
|
---
|
|
200
219
|
|
|
201
|
-
## 25.
|
|
220
|
+
## 25. create_new_version_of_col_using_LUT
|
|
202
221
|
- **Description**: Creates a new column in a DataFrame by mapping values from an old column using a lookup table.
|
|
203
222
|
- **Usage**: `create_new_version_of_col_using_LUT(df, keys_col, value_col, dict_for_specific_changes, new_col_name='New Version of Old Col')`
|
|
204
223
|
- **Example**: `create_new_version_of_col_using_LUT(df, 'Campaign Name', 'Campaign Type', search_campaign_name_retag_lut, 'Campaign Name New')`
|
|
205
224
|
|
|
206
225
|
---
|
|
207
226
|
|
|
208
|
-
## 26.
|
|
227
|
+
## 26. convert_df_wide_2_long
|
|
209
228
|
- **Description**: Converts a DataFrame from wide to long format.
|
|
210
229
|
- **Usage**: `convert_df_wide_2_long(df, value_cols, variable_col_name='Stacked', value_col_name='Value')`
|
|
211
230
|
- **Example**: `convert_df_wide_2_long(df, ['Media Cost', 'Impressions', 'Clicks'], variable_col_name='Metric')`
|
|
212
231
|
|
|
213
232
|
---
|
|
214
233
|
|
|
215
|
-
## 27.
|
|
234
|
+
## 27. manually_edit_data
|
|
216
235
|
- **Description**: Enables manual updates to DataFrame cells by applying filters and editing a column.
|
|
217
236
|
- **Usage**: `manually_edit_data(df, filters_dict, col_to_change, new_value, change_in_existing_df_col='No', new_col_to_change_name='New', manual_edit_col_name=None, add_notes='No', existing_note_col_name=None, note=None)`
|
|
218
237
|
- **Example**: `manually_edit_data(df, {'OBS': ' <= datetime(2023,1,23)', 'File_Name': ' == France media'}, 'Master Include', 1, change_in_existing_df_col='Yes', new_col_to_change_name='Master Include', manual_edit_col_name='Manual Changes')`
|
|
219
238
|
|
|
220
239
|
---
|
|
221
240
|
|
|
222
|
-
## 28.
|
|
241
|
+
## 28. format_numbers_with_commas
|
|
223
242
|
- **Description**: Formats numeric data into numbers with commas and specified decimal places.
|
|
224
243
|
- **Usage**: `format_numbers_with_commas(df, decimal_length_chosen=2)`
|
|
225
244
|
- **Example**: `format_numbers_with_commas(df, 1)`
|
|
226
245
|
|
|
227
246
|
---
|
|
228
247
|
|
|
229
|
-
## 29.
|
|
248
|
+
## 29. filter_df_on_multiple_conditions
|
|
230
249
|
- **Description**: Filters a DataFrame based on multiple conditions from a dictionary.
|
|
231
250
|
- **Usage**: `filter_df_on_multiple_conditions(df, filters_dict)`
|
|
232
251
|
- **Example**: `filter_df_on_multiple_conditions(df, {'OBS': ' <= datetime(2023,1,23)', 'File_Name': ' == France media'})`
|
|
233
252
|
|
|
234
253
|
---
|
|
235
254
|
|
|
236
|
-
## 30.
|
|
255
|
+
## 30. read_and_concatenate_files
|
|
237
256
|
- **Description**: Reads and concatenates all files of a specified type in a folder.
|
|
238
257
|
- **Usage**: `read_and_concatenate_files(folder_path, file_type='csv')`
|
|
239
258
|
- **Example**: `read_and_concatenate_files(folder_path, file_type='csv')`
|
|
240
259
|
|
|
241
260
|
---
|
|
242
261
|
|
|
243
|
-
## 31.
|
|
262
|
+
## 31. remove_zero_values
|
|
244
263
|
- **Description**: Removes rows with zero values in a specified column.
|
|
245
264
|
- **Usage**: `remove_zero_values(data_frame, column_to_filter)`
|
|
246
265
|
- **Example**: `remove_zero_values(df, 'Funeral_Delivery')`
|
|
247
266
|
|
|
248
267
|
---
|
|
249
268
|
|
|
250
|
-
## 32.
|
|
269
|
+
## 32. upgrade_outdated_packages
|
|
251
270
|
- **Description**: Upgrades all outdated packages in the environment.
|
|
252
271
|
- **Usage**: `upgrade_outdated_packages()`
|
|
253
272
|
- **Example**: `upgrade_outdated_packages()`
|
|
254
273
|
|
|
255
274
|
---
|
|
256
275
|
|
|
257
|
-
## 33.
|
|
276
|
+
## 33. convert_mixed_formats_dates
|
|
258
277
|
- **Description**: Converts a mix of US and UK date formats to datetime.
|
|
259
278
|
- **Usage**: `convert_mixed_formats_dates(df, date_col)`
|
|
260
279
|
- **Example**: `convert_mixed_formats_dates(df, 'OBS')`
|
|
261
280
|
|
|
262
281
|
---
|
|
263
282
|
|
|
264
|
-
## 34.
|
|
283
|
+
## 34. fill_weekly_date_range
|
|
265
284
|
- **Description**: Fills in missing weeks with zero values.
|
|
266
285
|
- **Usage**: `fill_weekly_date_range(df, date_column, freq)`
|
|
267
286
|
- **Example**: `fill_weekly_date_range(df, 'OBS', 'W-MON')`
|
|
268
287
|
|
|
269
288
|
---
|
|
270
289
|
|
|
271
|
-
## 35.
|
|
290
|
+
## 35. add_prefix_and_suffix
|
|
272
291
|
- **Description**: Adds prefixes and/or suffixes to column headers.
|
|
273
292
|
- **Usage**: `add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)`
|
|
274
293
|
- **Example**: `add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')`
|
|
275
294
|
|
|
276
295
|
---
|
|
277
296
|
|
|
278
|
-
## 36.
|
|
297
|
+
## 36. create_dummies
|
|
279
298
|
- **Description**: Converts time series into binary indicators based on a threshold.
|
|
280
299
|
- **Usage**: `create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')`
|
|
281
300
|
- **Example**: `create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')`
|
|
282
301
|
|
|
283
302
|
---
|
|
284
303
|
|
|
285
|
-
## 37.
|
|
304
|
+
## 37. replace_substrings
|
|
286
305
|
- **Description**: Replaces substrings in a column of strings using a dictionary and can change column values to lowercase.
|
|
287
306
|
- **Usage**: `replace_substrings(df, column, replacements, to_lower=False, new_column=None)`
|
|
288
307
|
- **Example**: `replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True, new_column='Short Version')`
|
|
289
308
|
|
|
290
309
|
---
|
|
291
310
|
|
|
292
|
-
## 38. `add_total_column
|
|
311
|
+
## 38. `add_total_column
|
|
293
312
|
- **Description**: Sums all columns (excluding a specified column) to create a total column.
|
|
294
313
|
- **Usage**: `add_total_column(df, exclude_col=None, total_col_name='Total')`
|
|
295
314
|
- **Example**: `add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')`
|
|
296
315
|
|
|
297
316
|
---
|
|
298
317
|
|
|
299
|
-
## 39.
|
|
318
|
+
## 39. apply_lookup_table_based_on_substring
|
|
300
319
|
- **Description**: Maps substrings in a column to values using a lookup dictionary.
|
|
301
320
|
- **Usage**: `apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other')`
|
|
302
321
|
- **Example**: `apply_lookup_table_based_on_substring(df, 'Campaign Name', campaign_dict, new_col_name='Campaign Name Short', other_label='Full Funnel')`
|
|
303
322
|
|
|
304
323
|
---
|
|
305
324
|
|
|
306
|
-
## 40.
|
|
325
|
+
## 40. compare_overlap
|
|
307
326
|
- **Description**: Compares matching rows and columns in two DataFrames and outputs the differences.
|
|
308
327
|
- **Usage**: `compare_overlap(df1, df2, date_col)`
|
|
309
328
|
- **Example**: `compare_overlap(df_1, df_2, 'obs')`
|
|
310
329
|
|
|
311
330
|
---
|
|
312
331
|
|
|
313
|
-
## 41.
|
|
332
|
+
## 41. week_commencing_2_week_commencing_conversion
|
|
314
333
|
- **Description**: Converts a week commencing column to a different start day.
|
|
315
334
|
- **Usage**: `week_commencing_2_week_commencing_conversion(df, date_col, week_commencing='sun')`
|
|
316
335
|
- **Example**: `week_commencing_2_week_commencing_conversion(df, 'obs', week_commencing='mon')`
|
|
317
336
|
|
|
318
337
|
---
|
|
319
338
|
|
|
320
|
-
## 42.
|
|
339
|
+
## 42. plot_chart
|
|
321
340
|
- **Description**: Plots various chart types including line, area, scatter, and bar.
|
|
322
341
|
- **Usage**: `plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values', **kwargs)`
|
|
323
342
|
- **Example**: `plot_chart(df, 'obs', df.cols, chart_type='line', title='Spend Over Time', x_title='Date', y_title='Spend')`
|
|
324
343
|
|
|
325
344
|
---
|
|
326
345
|
|
|
327
|
-
## 43.
|
|
346
|
+
## 43. plot_two_with_common_cols
|
|
328
347
|
- **Description**: Plots charts for two DataFrames based on common column names.
|
|
329
348
|
- **Usage**: `plot_two_with_common_cols(df1, df2, date_column, same_axis=True)`
|
|
330
349
|
- **Example**: `plot_two_with_common_cols(df_1, df_2, date_column='obs')`
|
|
@@ -333,51 +352,82 @@ The IMS package is a python library for processing incoming data into a format t
|
|
|
333
352
|
|
|
334
353
|
## Data Pulling
|
|
335
354
|
|
|
336
|
-
## 1.
|
|
355
|
+
## 1. pull_fred_data
|
|
337
356
|
- **Description**: Fetch data from FRED using series ID tokens.
|
|
338
|
-
- **Usage**:
|
|
339
|
-
- **Example**:
|
|
357
|
+
- **Usage**: pull_fred_data(week_commencing, series_id_list)
|
|
358
|
+
- **Example**: pull_fred_data('mon', ['GPDIC1', 'Y057RX1Q020SBEA', 'GCEC1', 'ND000333Q', 'Y006RX1Q020SBEA'])
|
|
340
359
|
|
|
341
360
|
---
|
|
342
361
|
|
|
343
|
-
## 2.
|
|
362
|
+
## 2. pull_boe_data
|
|
344
363
|
- **Description**: Fetch and process Bank of England interest rate data.
|
|
345
|
-
- **Usage**:
|
|
346
|
-
- **Example**:
|
|
364
|
+
- **Usage**: pull_boe_data(week_commencing)
|
|
365
|
+
- **Example**: pull_boe_data('mon')
|
|
347
366
|
|
|
348
367
|
---
|
|
349
368
|
|
|
350
|
-
## 3.
|
|
351
|
-
- **Description**: Fetch and process time series data from the ONS API.
|
|
352
|
-
- **Usage**: `pull_ons_data(series_list, week_commencing)`
|
|
353
|
-
- **Example**: `pull_ons_data([{'series_id': 'LMSBSA', 'dataset_id': 'LMS'}], 'mon')`
|
|
354
|
-
|
|
355
|
-
---
|
|
356
|
-
|
|
357
|
-
## 4. `pull_oecd`
|
|
369
|
+
## 3. pull_oecd
|
|
358
370
|
- **Description**: Fetch macroeconomic data from OECD for a specified country.
|
|
359
|
-
- **Usage**:
|
|
360
|
-
- **Example**:
|
|
371
|
+
- **Usage**: pull_oecd(country='GBR', week_commencing='mon', start_date='2020-01-01')
|
|
372
|
+
- **Example**: pull_oecd('GBR', 'mon', '2000-01-01')
|
|
361
373
|
|
|
362
374
|
---
|
|
363
375
|
|
|
364
|
-
##
|
|
376
|
+
## 4. get_google_mobility_data
|
|
365
377
|
- **Description**: Fetch Google Mobility data for the specified country.
|
|
366
|
-
- **Usage**:
|
|
367
|
-
- **Example**:
|
|
378
|
+
- **Usage**: get_google_mobility_data(country, wc)
|
|
379
|
+
- **Example**: get_google_mobility_data('United Kingdom', 'mon')
|
|
368
380
|
|
|
369
381
|
---
|
|
370
382
|
|
|
371
|
-
##
|
|
383
|
+
## 5. pull_seasonality
|
|
372
384
|
- **Description**: Generate combined dummy variables for seasonality, trends, and COVID lockdowns.
|
|
373
|
-
- **Usage**:
|
|
374
|
-
- **Example**:
|
|
385
|
+
- **Usage**: pull_seasonality(week_commencing, start_date, countries)
|
|
386
|
+
- **Example**: pull_seasonality('mon', '2020-01-01', ['US', 'GB'])
|
|
375
387
|
|
|
376
388
|
---
|
|
377
389
|
|
|
378
|
-
##
|
|
390
|
+
## 6. pull_weather
|
|
379
391
|
- **Description**: Fetch and process historical weather data for the specified country.
|
|
380
|
-
- **Usage**:
|
|
381
|
-
- **Example**:
|
|
392
|
+
- **Usage**: pull_weather(week_commencing, country)
|
|
393
|
+
- **Example**: pull_weather('mon', 'GBR')
|
|
394
|
+
|
|
395
|
+
---
|
|
396
|
+
|
|
397
|
+
## 7. pull_macro_ons_uk
|
|
398
|
+
- **Description**: Fetch and process time series data from the Beta ONS API.
|
|
399
|
+
- **Usage**: pull_macro_ons_uk(additional_list, week_commencing, sector)
|
|
400
|
+
- **Example**: pull_macro_ons_uk(['HBOI'], 'mon', 'fast_food')
|
|
401
|
+
|
|
402
|
+
---
|
|
403
|
+
|
|
404
|
+
## 8. pull_yfinance
|
|
405
|
+
- **Description**: Fetch and process time series data from Yahoo Finance.
|
|
406
|
+
- **Usage**: pull_yfinance(tickers, week_start_day)
|
|
407
|
+
- **Example**: pull_yfinance(['^FTMC', '^IXIC'], 'mon')
|
|
408
|
+
|
|
409
|
+
## Installation
|
|
410
|
+
|
|
411
|
+
Install the IMS package via pip:
|
|
412
|
+
|
|
413
|
+
```bash
|
|
414
|
+
pip install imsciences
|
|
415
|
+
```
|
|
416
|
+
|
|
417
|
+
---
|
|
418
|
+
|
|
419
|
+
## Useage
|
|
420
|
+
|
|
421
|
+
```bash
|
|
422
|
+
from imsciences import *
|
|
423
|
+
ims = dataprocessing()
|
|
424
|
+
ims_pull = datapull()
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
---
|
|
428
|
+
|
|
429
|
+
## License
|
|
430
|
+
|
|
431
|
+
This project is licensed under the MIT License.
|
|
382
432
|
|
|
383
433
|
---
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
2
2
|
dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
3
3
|
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
|
-
imsciences/__init__.py,sha256=
|
|
4
|
+
imsciences/__init__.py,sha256=7CfK2dMjPnBBw6I4st-20MdMlLjZULviFVXF2eMD9NI,80
|
|
5
5
|
imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
|
|
6
|
-
imsciences/datafunctions.py,sha256=
|
|
6
|
+
imsciences/datafunctions.py,sha256=XrvJWWFh9gdKAoeIHee2nYi0Z0zPxmW3oB6ICnGTxYc,158444
|
|
7
7
|
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
8
8
|
imsciences/unittesting.py,sha256=d9H5HN8y7oof59hqN9mGqkjulExqFd93BEW-X8w_Id8,58142
|
|
9
9
|
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
10
10
|
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
11
11
|
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
12
12
|
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
13
|
-
imsciences-0.
|
|
14
|
-
imsciences-0.
|
|
15
|
-
imsciences-0.
|
|
16
|
-
imsciences-0.
|
|
17
|
-
imsciences-0.
|
|
13
|
+
imsciences-0.8.1.dist-info/METADATA,sha256=sJK90uzVkH6KCDVM3hmkbRyGoXNmie8JMoCVLy4J7Fg,17785
|
|
14
|
+
imsciences-0.8.1.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
|
|
15
|
+
imsciences-0.8.1.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
|
|
16
|
+
imsciences-0.8.1.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
17
|
+
imsciences-0.8.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|