imsciences 0.9.5.8__py3-none-any.whl → 0.9.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of imsciences might be problematic. Click here for more details.
- imsciences/mmm.py +24 -1
- imsciences/pull.py +14 -11
- {imsciences-0.9.5.8.dist-info → imsciences-0.9.6.0.dist-info}/METADATA +1 -1
- imsciences-0.9.6.0.dist-info/RECORD +11 -0
- {imsciences-0.9.5.8.dist-info → imsciences-0.9.6.0.dist-info}/WHEEL +1 -1
- dataprocessing/__init__.py +0 -1
- dataprocessing/data-processing-functions.py +0 -2
- dataprocessing/datafunctions.py +0 -2
- imsciences/datafunctions-IMS-24Ltp-3.py +0 -2711
- imsciences/datafunctions.py +0 -3351
- imsciences/datapull.py +0 -374
- imsciences-0.9.5.8.dist-info/PKG-INFO-IMS-24Ltp-3 +0 -24
- imsciences-0.9.5.8.dist-info/RECORD +0 -22
- imsciencesdataprocessing/__init__.py +0 -1
- imsciencesdataprocessing/datafunctions.py +0 -2
- imsdataprocessing/__init__.py +0 -1
- imsdataprocessing/datafunctions.py +0 -2
- {imsciences-0.9.5.8.dist-info → imsciences-0.9.6.0.dist-info}/LICENSE.txt +0 -0
- {imsciences-0.9.5.8.dist-info → imsciences-0.9.6.0.dist-info}/top_level.txt +0 -0
imsciences/mmm.py
CHANGED
|
@@ -1554,4 +1554,27 @@ class dataprocessing:
|
|
|
1554
1554
|
"combined_features": combined_features,
|
|
1555
1555
|
}
|
|
1556
1556
|
|
|
1557
|
-
return output
|
|
1557
|
+
return output
|
|
1558
|
+
|
|
1559
|
+
def quid_pr (self, df):
|
|
1560
|
+
def convert_date(date_str):
|
|
1561
|
+
try:
|
|
1562
|
+
return datetime.strptime(date_str, '%b %d, %Y')
|
|
1563
|
+
except ValueError:
|
|
1564
|
+
return None # Return None if conversion fails
|
|
1565
|
+
# Apply conversion to create new columns
|
|
1566
|
+
df['Start Date'] = df['Earliest Published'].astype(str).apply(convert_date)
|
|
1567
|
+
df['End Date'] = df['Latest Published'].astype(str).apply(convert_date)
|
|
1568
|
+
df['Days Duration'] = (df['End Date'] - df['Start Date']).dt.days + 1 # Ensure inclusive range
|
|
1569
|
+
df['Count per Day'] = df['Published Count'] / df['Days Duration'] # Calculate count per day
|
|
1570
|
+
df['Social Engagement per Day'] = df['Social Engagement'] / df['Days Duration']
|
|
1571
|
+
df['Week Start'] = df['Start Date'].apply(lambda x: x - timedelta(days=x.weekday()) if pd.notnull(x) else None)
|
|
1572
|
+
count_df = df.groupby('Week Start')['Count per Day'].sum().reset_index()
|
|
1573
|
+
total_engagement_per_company = df.groupby('Company (Primary Mention)')['Social Engagement'].sum().reset_index() # Caluclates Social Engagement across whole period
|
|
1574
|
+
valid_companies = total_engagement_per_company[total_engagement_per_company['Social Engagement'] > 0]['Company (Primary Mention)'] # Filters out Companies with no Social Engagement
|
|
1575
|
+
social_engagement_df = df[df['Company (Primary Mention)'].isin(valid_companies)].groupby(['Week Start', 'Company (Primary Mention)'])[
|
|
1576
|
+
'Social Engagement'
|
|
1577
|
+
].sum().reset_index()
|
|
1578
|
+
total_social_engagement_df = df.groupby('Week Start')['Social Engagement per Day'].sum().reset_index()
|
|
1579
|
+
|
|
1580
|
+
return count_df, total_social_engagement_df, social_engagement_df
|
imsciences/pull.py
CHANGED
|
@@ -133,7 +133,7 @@ class datapull:
|
|
|
133
133
|
|
|
134
134
|
Args:
|
|
135
135
|
week_commencing (str): The starting day of the week for aggregation.
|
|
136
|
-
Options are "mon", "tue", "wed", "
|
|
136
|
+
Options are "mon", "tue", "wed", "thu", "fri", "sat", "sun".
|
|
137
137
|
Default is "mon".
|
|
138
138
|
max_retries (int): Maximum number of retries to fetch data in case of failure. Default is 5.
|
|
139
139
|
delay (int): Delay in seconds between retry attempts. Default is 5.
|
|
@@ -144,7 +144,7 @@ class datapull:
|
|
|
144
144
|
and 'macro_boe_intr_rate' contains the average interest rate for the week.
|
|
145
145
|
"""
|
|
146
146
|
# Week commencing dictionary
|
|
147
|
-
day_dict = {"mon": 0, "tue": 1, "wed": 2, "
|
|
147
|
+
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thu": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
148
148
|
|
|
149
149
|
# URL of the Bank of England data page
|
|
150
150
|
url = 'https://www.bankofengland.co.uk/boeapps/database/Bank-Rate.asp'
|
|
@@ -209,7 +209,7 @@ class datapull:
|
|
|
209
209
|
Args:
|
|
210
210
|
country (list): A string containing a 3-letter code the of country of interest (E.g: "GBR", "FRA", "USA", "DEU")
|
|
211
211
|
week_commencing (str): The starting day of the week for aggregation.
|
|
212
|
-
Options are "mon", "tue", "wed", "
|
|
212
|
+
Options are "mon", "tue", "wed", "thu", "fri", "sat", "sun".
|
|
213
213
|
start_date (str): Dataset start date in the format "YYYY-MM-DD"
|
|
214
214
|
|
|
215
215
|
Returns:
|
|
@@ -383,7 +383,7 @@ class datapull:
|
|
|
383
383
|
# ---------------------------------------------------------------------
|
|
384
384
|
# 0. Setup: dictionary for 'week_commencing' to Python weekday() integer
|
|
385
385
|
# ---------------------------------------------------------------------
|
|
386
|
-
day_dict = {"mon": 0, "tue": 1, "wed": 2, "
|
|
386
|
+
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thu": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
387
387
|
|
|
388
388
|
# ---------------------------------------------------------------------
|
|
389
389
|
# 1. Create daily date range from start_date to today
|
|
@@ -668,7 +668,7 @@ class datapull:
|
|
|
668
668
|
raise ValueError("country_codes must be a list/tuple or a single string.")
|
|
669
669
|
|
|
670
670
|
# --- Setup / Constants --- #
|
|
671
|
-
day_dict = {"mon": 0, "tue": 1, "wed": 2, "
|
|
671
|
+
day_dict = {"mon": 0, "tue": 1, "wed": 2, "thu": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
672
672
|
# Map each 2-letter code to a key
|
|
673
673
|
country_dict = {
|
|
674
674
|
"US": "US_STATES",
|
|
@@ -1186,6 +1186,9 @@ class datapull:
|
|
|
1186
1186
|
# Define CDIDs for sectors and defaults
|
|
1187
1187
|
sector_cdids = {
|
|
1188
1188
|
"fast_food": ["L7TD", "L78Q", "DOAD"],
|
|
1189
|
+
"clothing_footwear": ["D7BW","D7GO","CHBJ"],
|
|
1190
|
+
"fuel": ["A9FS","L7FP","CHOL"],
|
|
1191
|
+
"cars":["D7E8","D7E9","D7CO"],
|
|
1189
1192
|
"default": ["D7G7", "MGSX", "UKPOP", "IHYQ", "YBEZ", "MS77"],
|
|
1190
1193
|
}
|
|
1191
1194
|
|
|
@@ -1203,7 +1206,7 @@ class datapull:
|
|
|
1203
1206
|
combined_df = pd.DataFrame()
|
|
1204
1207
|
|
|
1205
1208
|
# Map week start day to pandas weekday convention
|
|
1206
|
-
days_map = {"mon": 0, "tue": 1, "wed": 2, "
|
|
1209
|
+
days_map = {"mon": 0, "tue": 1, "wed": 2, "thu": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
1207
1210
|
if week_start_day not in days_map:
|
|
1208
1211
|
raise ValueError("Invalid week start day. Choose from: " + ", ".join(days_map.keys()))
|
|
1209
1212
|
week_start = days_map[week_start_day]
|
|
@@ -1337,7 +1340,7 @@ class datapull:
|
|
|
1337
1340
|
end_date = datetime.today().strftime("%Y-%m-%d")
|
|
1338
1341
|
|
|
1339
1342
|
# Mapping week start day to pandas weekday convention
|
|
1340
|
-
days_map = {"mon": 0, "tue": 1, "wed": 2, "
|
|
1343
|
+
days_map = {"mon": 0, "tue": 1, "wed": 2, "thu": 3, "fri": 4, "sat": 5, "sun": 6}
|
|
1341
1344
|
if week_start_day not in days_map:
|
|
1342
1345
|
raise ValueError("Invalid week start day. Choose from: " + ", ".join(days_map.keys()))
|
|
1343
1346
|
week_start = days_map[week_start_day]
|
|
@@ -1497,9 +1500,9 @@ class datapull:
|
|
|
1497
1500
|
# Aggregate by week commencing
|
|
1498
1501
|
day_offsets = {
|
|
1499
1502
|
'mon': 'W-MON',
|
|
1500
|
-
'
|
|
1503
|
+
'tue': 'W-TUE',
|
|
1501
1504
|
'wed': 'W-WED',
|
|
1502
|
-
'
|
|
1505
|
+
'thu': 'W-THU',
|
|
1503
1506
|
'fri': 'W-FRI',
|
|
1504
1507
|
'sat': 'W-SAT',
|
|
1505
1508
|
'sun': 'W-SUN'
|
|
@@ -1592,9 +1595,9 @@ class datapull:
|
|
|
1592
1595
|
# Resample by week
|
|
1593
1596
|
day_offsets = {
|
|
1594
1597
|
'mon': 'W-MON',
|
|
1595
|
-
'
|
|
1598
|
+
'tue': 'W-TUE',
|
|
1596
1599
|
'wed': 'W-WED',
|
|
1597
|
-
'
|
|
1600
|
+
'thu': 'W-THU',
|
|
1598
1601
|
'fri': 'W-FRI',
|
|
1599
1602
|
'sat': 'W-SAT',
|
|
1600
1603
|
'sun': 'W-SUN'
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
imsciences/__init__.py,sha256=_HuYeLbDMTdt7GpKI4r6-d7yRPZgcAQ7yOW0-ydR2Yo,117
|
|
2
|
+
imsciences/geo.py,sha256=eenng7_BP_E2WD5Wt1G_oNxQS8W3t6lycRwJ91ngysY,15808
|
|
3
|
+
imsciences/mmm.py,sha256=qMh0ccOepehfCcux7EeG8cq6piSEoFEz5iiJbDBWOS4,82214
|
|
4
|
+
imsciences/pull.py,sha256=B05cjuWCihFfZp8pyO118QYHJiASsWn94s1o5hd1n1Q,81788
|
|
5
|
+
imsciences/unittesting.py,sha256=U177_Txg0Lqn49zYRu5bl9OVe_X7MkNJ6V_Zd6DHOsU,45656
|
|
6
|
+
imsciences/vis.py,sha256=2izdHQhmWEReerRqIxhY4Ai10VjL7xoUqyWyZC7-2XI,8931
|
|
7
|
+
imsciences-0.9.6.0.dist-info/LICENSE.txt,sha256=lVq2QwcExPX4Kl2DHeEkRrikuItcDB1Pr7yF7FQ8_z8,1108
|
|
8
|
+
imsciences-0.9.6.0.dist-info/METADATA,sha256=Khfs0zUye-2GAdswojmCutDo3JBq2OF0fEjuK0pkBR4,18846
|
|
9
|
+
imsciences-0.9.6.0.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
|
10
|
+
imsciences-0.9.6.0.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
11
|
+
imsciences-0.9.6.0.dist-info/RECORD,,
|
dataprocessing/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .datafunctions import hello
|
dataprocessing/datafunctions.py
DELETED