imsciences 0.9.5.9__py3-none-any.whl → 0.9.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/pull.py +14 -11
- {imsciences-0.9.5.9.dist-info → imsciences-0.9.6.2.dist-info}/METADATA +1 -1
- imsciences-0.9.6.2.dist-info/RECORD +11 -0
- dataprocessing/__init__.py +0 -1
- dataprocessing/data-processing-functions.py +0 -2
- dataprocessing/datafunctions.py +0 -2
- imsciences/datafunctions-IMS-24Ltp-3.py +0 -2711
- imsciences/datafunctions.py +0 -3351
- imsciences/datapull.py +0 -374
- imsciences-0.9.5.9.dist-info/PKG-INFO-IMS-24Ltp-3 +0 -24
- imsciences-0.9.5.9.dist-info/RECORD +0 -22
- imsciencesdataprocessing/__init__.py +0 -1
- imsciencesdataprocessing/datafunctions.py +0 -2
- imsdataprocessing/__init__.py +0 -1
- imsdataprocessing/datafunctions.py +0 -2
- {imsciences-0.9.5.9.dist-info → imsciences-0.9.6.2.dist-info}/LICENSE.txt +0 -0
- {imsciences-0.9.5.9.dist-info → imsciences-0.9.6.2.dist-info}/WHEEL +0 -0
- {imsciences-0.9.5.9.dist-info → imsciences-0.9.6.2.dist-info}/top_level.txt +0 -0
imsciences/datapull.py
DELETED
|
@@ -1,374 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
import calendar
|
|
3
|
-
import requests
|
|
4
|
-
import os
|
|
5
|
-
import plotly.express as px
|
|
6
|
-
import plotly.graph_objs as go
|
|
7
|
-
import numpy as np
|
|
8
|
-
import datetime
|
|
9
|
-
import re
|
|
10
|
-
import pandas as pd
|
|
11
|
-
from imsciences import *
|
|
12
|
-
from fredapi import Fred
|
|
13
|
-
import time
|
|
14
|
-
from datetime import datetime
|
|
15
|
-
from datafunctions import dataprocessing
|
|
16
|
-
|
|
17
|
-
class datapull:
|
|
18
|
-
|
|
19
|
-
def pull_help(self):
|
|
20
|
-
print("This is the help section. The functions in the package are as follows:")
|
|
21
|
-
|
|
22
|
-
print("\n1. pull_fred_data")
|
|
23
|
-
print(" - Description: Get data from FRED by using series id tokens.")
|
|
24
|
-
print(" - Usage: pull_fred_data(week_commencing, series_id_list)")
|
|
25
|
-
print(" - Example: pull_fred_data('sun', ['GPDIC1', 'Y057RX1Q020SBEA', 'GCEC1', 'ND000333Q', 'Y006RX1Q020SBEA'])")
|
|
26
|
-
|
|
27
|
-
############################################################### MACRO ##########################################################################
|
|
28
|
-
|
|
29
|
-
def pull_fred_data(self, week_commencing: str = 'mon', series_id_list: list[str] = ["GPDIC1", "Y057RX1Q020SBEA", "GCEC1", "ND000333Q", "Y006RX1Q020SBEA"]) -> pd.DataFrame:
|
|
30
|
-
'''
|
|
31
|
-
Parameters
|
|
32
|
-
----------
|
|
33
|
-
week_commencing : str
|
|
34
|
-
specify the day for the week commencing, the default is 'sun' (e.g., 'mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
|
|
35
|
-
|
|
36
|
-
series_id_list : list[str]
|
|
37
|
-
provide a list with IDs to download data series from FRED (link: https://fred.stlouisfed.org/tags/series?t=id). Default list is
|
|
38
|
-
["GPDIC1", "Y057RX1Q020SBEA", "GCEC1", "ND000333Q", "Y006RX1Q020SBEA"]
|
|
39
|
-
|
|
40
|
-
Returns
|
|
41
|
-
----------
|
|
42
|
-
pd.DataFrame
|
|
43
|
-
Return a data frame with FRED data according to the series IDs provided
|
|
44
|
-
|
|
45
|
-
Example
|
|
46
|
-
----------
|
|
47
|
-
pull_fred_data("mon", ["GCEC1", "SP500"])
|
|
48
|
-
'''
|
|
49
|
-
# Fred API
|
|
50
|
-
fred = Fred(api_key='76f5f8156145fdb8fbaf66f1eb944f8a')
|
|
51
|
-
|
|
52
|
-
# Fetch the metadata for each series to get the full names
|
|
53
|
-
series_names = {series_id: fred.get_series_info(series_id).title for series_id in series_id_list}
|
|
54
|
-
|
|
55
|
-
# Download data from series id list
|
|
56
|
-
fred_series = {series_id: fred.get_series(series_id) for series_id in series_id_list}
|
|
57
|
-
|
|
58
|
-
# Data processing
|
|
59
|
-
date_range = {'OBS': pd.date_range("1950-01-01", datetime.today().strftime('%Y-%m-%d'), freq='d')}
|
|
60
|
-
fred_series_df = pd.DataFrame(date_range)
|
|
61
|
-
|
|
62
|
-
for series_id, series_data in fred_series.items():
|
|
63
|
-
series_data = series_data.reset_index()
|
|
64
|
-
series_data.columns = ['OBS', series_names[series_id]] # Use the series name as the column header
|
|
65
|
-
fred_series_df = pd.merge_asof(fred_series_df, series_data, on='OBS', direction='backward')
|
|
66
|
-
|
|
67
|
-
# Handle duplicate columns
|
|
68
|
-
for col in fred_series_df.columns:
|
|
69
|
-
if '_x' in col:
|
|
70
|
-
base_col = col.replace('_x', '')
|
|
71
|
-
fred_series_df[base_col] = fred_series_df[col].combine_first(fred_series_df[base_col + '_y'])
|
|
72
|
-
fred_series_df.drop([col, base_col + '_y'], axis=1, inplace=True)
|
|
73
|
-
|
|
74
|
-
# Ensure sum_columns are present in the DataFrame
|
|
75
|
-
sum_columns = [series_names[series_id] for series_id in series_id_list if series_names[series_id] in fred_series_df.columns]
|
|
76
|
-
|
|
77
|
-
# Aggregate results by week
|
|
78
|
-
fred_df_final = dataprocessing.aggregate_daily_to_wc_wide(self, df=fred_series_df,
|
|
79
|
-
date_column="OBS",
|
|
80
|
-
group_columns=[],
|
|
81
|
-
sum_columns=sum_columns,
|
|
82
|
-
wc=week_commencing,
|
|
83
|
-
aggregation="average")
|
|
84
|
-
|
|
85
|
-
# Remove anything after the instance of any ':' in the column names and rename, except for 'OBS'
|
|
86
|
-
fred_df_final.columns = ['OBS' if col == 'OBS' else 'macro_' + col.lower().split(':')[0].replace(' ', '_') for col in fred_df_final.columns]
|
|
87
|
-
|
|
88
|
-
return fred_df_final
|
|
89
|
-
|
|
90
|
-
def pull_boe_data(self, week_commencing="mon", max_retries=30, delay=5):
|
|
91
|
-
"""
|
|
92
|
-
Fetch and process Bank of England interest rate data.
|
|
93
|
-
|
|
94
|
-
Args:
|
|
95
|
-
week_commencing (str): The starting day of the week for aggregation.
|
|
96
|
-
Options are "mon", "tue", "wed", "thur", "fri", "sat", "sun".
|
|
97
|
-
Default is "sun".
|
|
98
|
-
max_retries (int): Maximum number of retries to fetch data in case of failure. Default is 30.
|
|
99
|
-
delay (int): Delay in seconds between retry attempts. Default is 5.
|
|
100
|
-
|
|
101
|
-
Returns:
|
|
102
|
-
pd.DataFrame: A DataFrame with weekly aggregated Bank of England interest rates.
|
|
103
|
-
The 'OBS' column contains the week commencing dates in 'dd/mm/yyyy' format
|
|
104
|
-
and 'macro_boe_intr_rate' contains the average interest rate for the week.
|
|
105
|
-
"""
|
|
106
|
-
# Week commencing dictionary
|
|
107
|
-
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
108
|
-
|
|
109
|
-
# Function to fetch the data with retries
|
|
110
|
-
def fetch_data_with_retries(url, max_retries, delay):
|
|
111
|
-
for attempt in range(max_retries):
|
|
112
|
-
try:
|
|
113
|
-
html_table = pd.read_html(url)[0]
|
|
114
|
-
return html_table
|
|
115
|
-
except Exception as e:
|
|
116
|
-
print(f"Attempt {attempt + 1} failed: {e}")
|
|
117
|
-
if attempt < max_retries - 1:
|
|
118
|
-
time.sleep(delay)
|
|
119
|
-
else:
|
|
120
|
-
raise
|
|
121
|
-
|
|
122
|
-
# Import HTML data from Bank of England rate
|
|
123
|
-
url = 'https://www.bankofengland.co.uk/boeapps/database/Bank-Rate.asp'
|
|
124
|
-
html_table = fetch_data_with_retries(url, max_retries, delay)
|
|
125
|
-
|
|
126
|
-
df = pd.DataFrame(html_table)
|
|
127
|
-
df.rename(columns={"Date Changed": "OBS", "Rate": "macro_boe_intr_rate"}, inplace=True)
|
|
128
|
-
|
|
129
|
-
# Change date column to datetime and find the corresponding week to the date
|
|
130
|
-
df["OBS"] = pd.to_datetime(df["OBS"], format="%d %b %y")
|
|
131
|
-
df.sort_values("OBS", axis=0, inplace=True)
|
|
132
|
-
|
|
133
|
-
# Create a daily date range and find the week commencing for that day
|
|
134
|
-
date_range = pd.date_range(df["OBS"].iloc[0], datetime.today(), freq="d")
|
|
135
|
-
df_daily = pd.DataFrame(date_range, columns=["OBS"])
|
|
136
|
-
|
|
137
|
-
# Adjust each date to the specified week commencing day
|
|
138
|
-
df_daily['Week_Commencing'] = df_daily["OBS"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
139
|
-
|
|
140
|
-
# Outer merge the daily date range on the boe dataframe and forward fill in the blanks
|
|
141
|
-
df_final = df_daily.merge(df, on='OBS', how="left")
|
|
142
|
-
df_final["macro_boe_intr_rate"].ffill(inplace=True)
|
|
143
|
-
|
|
144
|
-
# Group by the week start date and get the mean of the interest rates for each week
|
|
145
|
-
df_final = df_final.groupby('Week_Commencing')['macro_boe_intr_rate'].mean().reset_index()
|
|
146
|
-
|
|
147
|
-
df_final['Week_Commencing'] = df_final['Week_Commencing'].dt.strftime('%d/%m/%Y')
|
|
148
|
-
df_final.rename(columns={'Week_Commencing': 'OBS'}, inplace=True)
|
|
149
|
-
|
|
150
|
-
return df_final
|
|
151
|
-
|
|
152
|
-
def pull_ons_data(self, series_list, week_commencing):
|
|
153
|
-
"""
|
|
154
|
-
Fetch and process time series data from the ONS API.
|
|
155
|
-
|
|
156
|
-
Args:
|
|
157
|
-
series_list (list): A list of dictionaries where each dictionary represents a time series.
|
|
158
|
-
Each dictionary should have the keys 'series_id' and 'dataset_id'.
|
|
159
|
-
week_commencing (str): The starting day of the week for aggregation.
|
|
160
|
-
Options are "mon", "tue", "wed", "thur", "fri", "sat", "sun".
|
|
161
|
-
|
|
162
|
-
Returns:
|
|
163
|
-
pd.DataFrame: A DataFrame with weekly aggregated ONS data. The 'OBS' column contains the week
|
|
164
|
-
commencing dates and other columns contain the aggregated time series values.
|
|
165
|
-
"""
|
|
166
|
-
# Generate a date range from 1950-01-01 to today
|
|
167
|
-
date_range = pd.date_range(start="1950-01-01", end=datetime.today(), freq='D')
|
|
168
|
-
daily_df = pd.DataFrame(date_range, columns=['OBS'])
|
|
169
|
-
|
|
170
|
-
# Keep track of the renamed value columns
|
|
171
|
-
value_columns = []
|
|
172
|
-
|
|
173
|
-
for series in series_list:
|
|
174
|
-
series_id = series['series_id']
|
|
175
|
-
dataset_id = series['dataset_id']
|
|
176
|
-
|
|
177
|
-
# Construct the URL for data
|
|
178
|
-
data_url = f"https://api.ons.gov.uk/timeseries/{series_id}/dataset/{dataset_id}/data"
|
|
179
|
-
|
|
180
|
-
# Make the request to the ONS API for data
|
|
181
|
-
data_response = requests.get(data_url)
|
|
182
|
-
|
|
183
|
-
# Check if the request was successful
|
|
184
|
-
if data_response.status_code != 200:
|
|
185
|
-
print(f"Failed to fetch data for series {series_id}: {data_response.status_code} {data_response.text}")
|
|
186
|
-
continue
|
|
187
|
-
|
|
188
|
-
# Parse the JSON response for data
|
|
189
|
-
data = data_response.json()
|
|
190
|
-
|
|
191
|
-
# Attempt to extract the name of the time series from the data response
|
|
192
|
-
series_name = data.get('description', {}).get('title', 'Value')
|
|
193
|
-
|
|
194
|
-
# Determine the most granular time series data available
|
|
195
|
-
if 'months' in data and data['months']:
|
|
196
|
-
time_series_data = data['months']
|
|
197
|
-
elif 'quarters' in data and data['quarters']:
|
|
198
|
-
time_series_data = data['quarters']
|
|
199
|
-
elif 'years' in data and data['years']:
|
|
200
|
-
time_series_data = data['years']
|
|
201
|
-
else:
|
|
202
|
-
print("No time series data found in the response")
|
|
203
|
-
continue
|
|
204
|
-
|
|
205
|
-
# Create a DataFrame from the time series data
|
|
206
|
-
df = pd.DataFrame(time_series_data)
|
|
207
|
-
|
|
208
|
-
# Handle different frequencies in the data
|
|
209
|
-
if 'date' in df.columns:
|
|
210
|
-
if any(df['date'].str.contains('Q')):
|
|
211
|
-
df['date'] = pd.PeriodIndex(df['date'], freq='Q').to_timestamp()
|
|
212
|
-
else:
|
|
213
|
-
df['date'] = pd.to_datetime(df['date'])
|
|
214
|
-
|
|
215
|
-
df = df.rename(columns={'date': 'OBS', 'value': series_name})
|
|
216
|
-
|
|
217
|
-
# Rename the value column
|
|
218
|
-
new_col_name = 'macro_' + series_name.lower().replace(':', '').replace(' ', '_').replace('-', '_')
|
|
219
|
-
df = df.rename(columns={series_name: new_col_name})
|
|
220
|
-
|
|
221
|
-
# Track the renamed value column
|
|
222
|
-
value_columns.append(new_col_name)
|
|
223
|
-
|
|
224
|
-
# Merge the data based on the observation date
|
|
225
|
-
daily_df = pd.merge_asof(daily_df, df[['OBS', new_col_name]], on='OBS', direction='backward')
|
|
226
|
-
|
|
227
|
-
# Ensure columns are numeric
|
|
228
|
-
for col in value_columns:
|
|
229
|
-
if col in daily_df.columns:
|
|
230
|
-
daily_df[col] = pd.to_numeric(daily_df[col], errors='coerce').fillna(0)
|
|
231
|
-
else:
|
|
232
|
-
print(f"Column {col} not found in daily_df")
|
|
233
|
-
|
|
234
|
-
# Aggregate results by week
|
|
235
|
-
ons_df_final = dataprocessing.aggregate_daily_to_wc_wide(self, df=daily_df,
|
|
236
|
-
date_column="OBS",
|
|
237
|
-
group_columns=[],
|
|
238
|
-
sum_columns=value_columns,
|
|
239
|
-
wc=week_commencing,
|
|
240
|
-
aggregation="average")
|
|
241
|
-
|
|
242
|
-
return ons_df_final
|
|
243
|
-
|
|
244
|
-
############################################################### Seasonality ##########################################################################
|
|
245
|
-
|
|
246
|
-
def pull_combined_dummies(self, week_commencing):
|
|
247
|
-
# Week commencing dictionary
|
|
248
|
-
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
249
|
-
|
|
250
|
-
# Create daily date range dataframe
|
|
251
|
-
date_range = pd.date_range(datetime.datetime(2015, 1, 1), datetime.date.today(), freq="d")
|
|
252
|
-
df_daily = pd.DataFrame(date_range, columns=["Date"])
|
|
253
|
-
|
|
254
|
-
# Create weekly date range dataframe
|
|
255
|
-
df_daily['week_start'] = df_daily["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
256
|
-
df_weekly_start = df_daily[['week_start']].drop_duplicates().reset_index(drop=True)
|
|
257
|
-
df_weekly_start.rename(columns={'week_start': "Date"}, inplace=True)
|
|
258
|
-
|
|
259
|
-
df_weekly_start.index = np.arange(1, len(df_weekly_start) + 1)
|
|
260
|
-
df_weekly_start.set_index("Date", inplace=True)
|
|
261
|
-
|
|
262
|
-
# Create individual weekly dummies
|
|
263
|
-
dummy_columns = {}
|
|
264
|
-
for i in range(len(df_weekly_start)):
|
|
265
|
-
col_name = f"dum_{df_weekly_start.index[i].strftime('%Y_%m_%d')}"
|
|
266
|
-
dummy_columns[col_name] = [0] * len(df_weekly_start)
|
|
267
|
-
dummy_columns[col_name][i] = 1
|
|
268
|
-
|
|
269
|
-
df_dummies = pd.DataFrame(dummy_columns, index=df_weekly_start.index)
|
|
270
|
-
df_weekly_start = pd.concat([df_weekly_start, df_dummies], axis=1)
|
|
271
|
-
|
|
272
|
-
# Create monthly dummies
|
|
273
|
-
df_daily["Month"] = df_daily["Date"].dt.month_name().str.lower()
|
|
274
|
-
df_monthly_dummies = pd.get_dummies(df_daily, prefix="seas", columns=["Month"])
|
|
275
|
-
df_monthly_dummies['week_start'] = df_daily["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
276
|
-
df_monthly_dummies = df_monthly_dummies.groupby('week_start').sum(numeric_only=True).reset_index().rename(columns={'week_start': "Date"})
|
|
277
|
-
|
|
278
|
-
df_monthly_dummies.set_index("Date", inplace=True)
|
|
279
|
-
df_monthly_dummies = df_monthly_dummies / 7
|
|
280
|
-
|
|
281
|
-
# Combine weekly and monthly dataframes
|
|
282
|
-
df_combined = pd.concat([df_weekly_start, df_monthly_dummies], axis=1)
|
|
283
|
-
|
|
284
|
-
# Create weekly dummies
|
|
285
|
-
df_combined.reset_index(inplace=True)
|
|
286
|
-
df_combined["Week"] = df_combined["Date"].dt.isocalendar().week
|
|
287
|
-
df_combined = pd.get_dummies(df_combined, prefix="wk", columns=["Week"])
|
|
288
|
-
|
|
289
|
-
# Create yearly dummies
|
|
290
|
-
df_combined["Year"] = df_combined["Date"].dt.year
|
|
291
|
-
df_combined = pd.get_dummies(df_combined, prefix="seas", columns=["Year"])
|
|
292
|
-
|
|
293
|
-
# Add constant
|
|
294
|
-
df_combined["Constant"] = 1
|
|
295
|
-
|
|
296
|
-
# Add trend
|
|
297
|
-
df_combined["Trend"] = df_combined.index + 1
|
|
298
|
-
|
|
299
|
-
# Set date as index
|
|
300
|
-
df_combined.set_index("Date", inplace=True)
|
|
301
|
-
|
|
302
|
-
# Create COVID lockdown dummies
|
|
303
|
-
lockdown_periods = [
|
|
304
|
-
# Lockdown 1
|
|
305
|
-
("2020-03-23", "2020-05-24"),
|
|
306
|
-
# Lockdown 2
|
|
307
|
-
("2020-11-05", "2020-12-02"),
|
|
308
|
-
# Lockdown 3
|
|
309
|
-
("2021-01-04", "2021-03-08")
|
|
310
|
-
]
|
|
311
|
-
|
|
312
|
-
df_covid = pd.DataFrame(date_range, columns=["Date"])
|
|
313
|
-
df_covid["national_lockdown"] = 0
|
|
314
|
-
|
|
315
|
-
for start, end in lockdown_periods:
|
|
316
|
-
df_covid.loc[(df_covid["Date"] >= start) & (df_covid["Date"] <= end), "national_lockdown"] = 1
|
|
317
|
-
|
|
318
|
-
df_covid['week_start'] = df_covid["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
319
|
-
df_covid.drop("Date", axis=1, inplace=True)
|
|
320
|
-
df_covid.rename(columns={"week_start": "OBS"}, inplace=True)
|
|
321
|
-
df_national_lockdown_total = df_covid.groupby('OBS').sum(numeric_only=True)
|
|
322
|
-
df_national_lockdown_total.rename(columns={"national_lockdown": "covid_national_lockdown_total"}, inplace=True)
|
|
323
|
-
|
|
324
|
-
df_national_lockdown_1 = df_national_lockdown_total.copy(deep=True)
|
|
325
|
-
df_national_lockdown_2 = df_national_lockdown_total.copy(deep=True)
|
|
326
|
-
df_national_lockdown_3 = df_national_lockdown_total.copy(deep=True)
|
|
327
|
-
|
|
328
|
-
df_national_lockdown_1.loc[df_national_lockdown_1.index > "2020-05-24"] = 0
|
|
329
|
-
df_national_lockdown_1.rename(columns={"covid_national_lockdown_total": "covid_national_lockdown_1"}, inplace=True)
|
|
330
|
-
|
|
331
|
-
df_national_lockdown_2.loc[df_national_lockdown_2.index < "2020-11-05"] = 0
|
|
332
|
-
df_national_lockdown_2.loc[df_national_lockdown_2.index > "2020-12-02"] = 0
|
|
333
|
-
df_national_lockdown_2.rename(columns={"covid_national_lockdown_total": "covid_national_lockdown_2"}, inplace=True)
|
|
334
|
-
|
|
335
|
-
df_national_lockdown_3.loc[df_national_lockdown_3.index < "2021-01-04"] = 0
|
|
336
|
-
df_national_lockdown_3.rename(columns={"covid_national_lockdown_total": "covid_national_lockdown_3"}, inplace=True)
|
|
337
|
-
|
|
338
|
-
df_final_covid = pd.concat([df_national_lockdown_total, df_national_lockdown_1, df_national_lockdown_2, df_national_lockdown_3], axis=1)
|
|
339
|
-
df_final_covid.reset_index(inplace=True)
|
|
340
|
-
df_final_covid.rename(columns={"index": "OBS"}, inplace=True)
|
|
341
|
-
|
|
342
|
-
# Create seasonal indicators for the last day and last Friday of the month
|
|
343
|
-
min_date = '2019-12-29'
|
|
344
|
-
max_date = datetime.date.today().strftime('%Y-%m-%d')
|
|
345
|
-
date_range_seas = pd.date_range(start=min_date, end=max_date)
|
|
346
|
-
|
|
347
|
-
df_seas = pd.DataFrame(date_range_seas, columns=['Date'])
|
|
348
|
-
df_seas['Last_Day_of_Month'] = df_seas['Date'].apply(lambda x: 1 if x == x.to_period('M').to_timestamp('M') else 0)
|
|
349
|
-
|
|
350
|
-
def is_last_friday(date):
|
|
351
|
-
last_day_of_month = date.to_period('M').to_timestamp('M')
|
|
352
|
-
last_day_weekday = last_day_of_month.dayofweek
|
|
353
|
-
if last_day_weekday >= 4:
|
|
354
|
-
days_to_subtract = last_day_weekday - 4
|
|
355
|
-
else:
|
|
356
|
-
days_to_subtract = last_day_weekday + 3
|
|
357
|
-
last_friday = last_day_of_month - pd.Timedelta(days=days_to_subtract)
|
|
358
|
-
return 1 if date == last_friday else 0
|
|
359
|
-
|
|
360
|
-
df_seas['Last_Friday_of_Month'] = df_seas['Date'].apply(is_last_friday)
|
|
361
|
-
|
|
362
|
-
df_seas['week_start'] = df_seas["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
|
|
363
|
-
df_seas = df_seas.groupby('week_start').sum(numeric_only=True).reset_index().rename(columns={'week_start': "Date"})
|
|
364
|
-
df_seas.set_index("Date", inplace=True)
|
|
365
|
-
|
|
366
|
-
# Combine all dataframes
|
|
367
|
-
df_combined = df_combined.reset_index().rename(columns={"Date": "OBS"})
|
|
368
|
-
df_final_combined = pd.merge(df_combined, df_final_covid, how='left', left_on='OBS', right_on='OBS')
|
|
369
|
-
df_final_combined = pd.merge(df_final_combined, df_seas, how='left', left_on='OBS', right_on='Date')
|
|
370
|
-
|
|
371
|
-
# Fill any NaN values with 0
|
|
372
|
-
df_final_combined.fillna(0, inplace=True)
|
|
373
|
-
|
|
374
|
-
return df_final_combined
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: imsciences
|
|
3
|
-
Version: 0.6.1.1
|
|
4
|
-
Summary: IMS Data Processing Package
|
|
5
|
-
Author: IMS
|
|
6
|
-
Author-email: cam@im-sciences.com
|
|
7
|
-
Keywords: python,data processing
|
|
8
|
-
Classifier: Development Status :: 3 - Alpha
|
|
9
|
-
Classifier: Intended Audience :: Developers
|
|
10
|
-
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Operating System :: Unix
|
|
12
|
-
Classifier: Operating System :: MacOS :: MacOS X
|
|
13
|
-
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
-
Description-Content-Type: text/markdown
|
|
15
|
-
Requires-Dist: pandas
|
|
16
|
-
|
|
17
|
-
# IMS Package Documentation
|
|
18
|
-
|
|
19
|
-
The IMS package is a python library for processing incoming data into a format that can be used for projects. IMS processing offers a variety of functions to manipulate and analyze data efficiently. Here are the functionalities provided by the package:
|
|
20
|
-
|
|
21
|
-
## Data Processing
|
|
22
|
-
|
|
23
|
-
## Data Pulling
|
|
24
|
-
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
2
|
-
dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
3
|
-
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
|
-
imsciences/__init__.py,sha256=_HuYeLbDMTdt7GpKI4r6-d7yRPZgcAQ7yOW0-ydR2Yo,117
|
|
5
|
-
imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
|
|
6
|
-
imsciences/datafunctions.py,sha256=WZrXNLO-SYrCuFt0pAbha74psMOZPY7meWJ7yWEbRpk,169953
|
|
7
|
-
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
8
|
-
imsciences/geo.py,sha256=eenng7_BP_E2WD5Wt1G_oNxQS8W3t6lycRwJ91ngysY,15808
|
|
9
|
-
imsciences/mmm.py,sha256=qMh0ccOepehfCcux7EeG8cq6piSEoFEz5iiJbDBWOS4,82214
|
|
10
|
-
imsciences/pull.py,sha256=8j4k9hnQ9IuxY8W_PvO6afutPm4Pz_SJRjZfU47qxX0,81654
|
|
11
|
-
imsciences/unittesting.py,sha256=U177_Txg0Lqn49zYRu5bl9OVe_X7MkNJ6V_Zd6DHOsU,45656
|
|
12
|
-
imsciences/vis.py,sha256=2izdHQhmWEReerRqIxhY4Ai10VjL7xoUqyWyZC7-2XI,8931
|
|
13
|
-
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
14
|
-
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
15
|
-
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
16
|
-
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
17
|
-
imsciences-0.9.5.9.dist-info/LICENSE.txt,sha256=lVq2QwcExPX4Kl2DHeEkRrikuItcDB1Pr7yF7FQ8_z8,1108
|
|
18
|
-
imsciences-0.9.5.9.dist-info/METADATA,sha256=3sC4oZfBBk5try5pxgpcPlFXb7RLEx6xgTbLpVdQB7c,18846
|
|
19
|
-
imsciences-0.9.5.9.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
|
|
20
|
-
imsciences-0.9.5.9.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
|
|
21
|
-
imsciences-0.9.5.9.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
22
|
-
imsciences-0.9.5.9.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .datafunctions import hello
|
imsdataprocessing/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .datafunctions import hello
|
|
File without changes
|
|
File without changes
|
|
File without changes
|