imsciences 0.9.6.5__tar.gz → 0.9.6.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/PKG-INFO +1 -1
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/imsciences/pull.py +34 -11
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/imsciences.egg-info/PKG-INFO +1 -1
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/setup.py +1 -1
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/LICENSE.txt +0 -0
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/README.md +0 -0
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/imsciences/__init__.py +0 -0
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/imsciences/geo.py +0 -0
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/imsciences/mmm.py +0 -0
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/imsciences/unittesting.py +0 -0
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/imsciences/vis.py +0 -0
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/imsciences.egg-info/PKG-INFO-TomG-HP-290722 +0 -0
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/imsciences.egg-info/SOURCES.txt +0 -0
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/imsciences.egg-info/dependency_links.txt +0 -0
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/imsciences.egg-info/requires.txt +0 -0
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/imsciences.egg-info/top_level.txt +0 -0
- {imsciences-0.9.6.5 → imsciences-0.9.6.7}/setup.cfg +0 -0
|
@@ -379,7 +379,7 @@ class datapull:
|
|
|
379
379
|
|
|
380
380
|
############################################################### Seasonality ##########################################################################
|
|
381
381
|
|
|
382
|
-
def pull_seasonality(
|
|
382
|
+
def pull_seasonality(week_commencing, start_date, countries):
|
|
383
383
|
"""
|
|
384
384
|
Generates a DataFrame with weekly seasonality features.
|
|
385
385
|
|
|
@@ -597,7 +597,7 @@ class datapull:
|
|
|
597
597
|
|
|
598
598
|
|
|
599
599
|
# ---------------------------------------------------------------------
|
|
600
|
-
# 4. Add daily indicators for last day & last Friday of month
|
|
600
|
+
# 4. Add daily indicators for last day & last Friday of month & payday
|
|
601
601
|
# ---------------------------------------------------------------------
|
|
602
602
|
df_daily["is_last_day_of_month"] = df_daily["Date"].dt.is_month_end
|
|
603
603
|
|
|
@@ -608,22 +608,27 @@ class datapull:
|
|
|
608
608
|
# Check if next Friday is in the next month
|
|
609
609
|
next_friday = date + timedelta(days=7)
|
|
610
610
|
return 1 if next_friday.month != date.month else 0
|
|
611
|
+
|
|
612
|
+
def is_payday(date):
|
|
613
|
+
return 1 if date.day >= 25 else 0
|
|
611
614
|
|
|
612
615
|
df_daily["is_last_friday_of_month"] = df_daily["Date"].apply(is_last_friday)
|
|
613
|
-
|
|
616
|
+
|
|
617
|
+
df_daily["is_payday"] = df_daily["Date"].apply(is_payday)
|
|
618
|
+
|
|
614
619
|
# Rename for clarity prefix
|
|
615
620
|
df_daily.rename(columns={
|
|
616
621
|
"is_last_day_of_month": "seas_last_day_of_month",
|
|
617
|
-
"is_last_friday_of_month": "seas_last_friday_of_month"
|
|
622
|
+
"is_last_friday_of_month": "seas_last_friday_of_month",
|
|
623
|
+
"is_payday": "seas_payday"
|
|
618
624
|
}, inplace=True)
|
|
619
625
|
|
|
620
|
-
|
|
621
626
|
# ---------------------------------------------------------------------
|
|
622
627
|
# 5. Weekly aggregation
|
|
623
628
|
# ---------------------------------------------------------------------
|
|
624
629
|
|
|
625
630
|
# Select only columns that are indicators/flags (intended for max aggregation)
|
|
626
|
-
flag_cols = [col for col in df_daily.columns if col.startswith('seas_') or col.startswith('is_')]
|
|
631
|
+
flag_cols = [col for col in df_daily.columns if (col.startswith('seas_') or col.startswith('is_')) and col != "seas_payday"]
|
|
627
632
|
# Ensure 'week_start' is present for grouping
|
|
628
633
|
df_to_agg = df_daily[['week_start'] + flag_cols]
|
|
629
634
|
|
|
@@ -635,7 +640,26 @@ class datapull:
|
|
|
635
640
|
.rename(columns={'week_start': "Date"})
|
|
636
641
|
.set_index("Date")
|
|
637
642
|
)
|
|
643
|
+
|
|
644
|
+
# Do specific aggregation for payday
|
|
645
|
+
# Make sure 'date' column exists in df_daily
|
|
646
|
+
df_daily["month"] = df_daily["Date"].dt.month
|
|
647
|
+
df_daily["year"] = df_daily["Date"].dt.year
|
|
648
|
+
|
|
649
|
+
# Sum of seas_payday flags per week
|
|
650
|
+
week_payday_sum = df_daily.groupby("week_start")["seas_payday"].sum()
|
|
638
651
|
|
|
652
|
+
# Divide the number of payday flags by number of paydays per month
|
|
653
|
+
payday_days_in_month = (
|
|
654
|
+
df_daily.groupby(["year", "month"])["seas_payday"].sum()
|
|
655
|
+
)
|
|
656
|
+
week_month = df_daily.groupby("week_start").first()[["month", "year"]]
|
|
657
|
+
week_days_in_month = week_month.apply(lambda row: payday_days_in_month.loc[(row["year"], row["month"])], axis=1)
|
|
658
|
+
df_weekly_flags["seas_payday"] = (week_payday_sum / week_days_in_month).fillna(0).values
|
|
659
|
+
|
|
660
|
+
# # Drop intermediate columns
|
|
661
|
+
# df_weekly_flags = df_weekly_flags.drop(columns=["month", "year"])
|
|
662
|
+
|
|
639
663
|
# --- Aggregate Week Number using MODE ---
|
|
640
664
|
# Define aggregation function for mode (handling potential multi-modal cases by taking the first)
|
|
641
665
|
def get_mode(x):
|
|
@@ -678,7 +702,6 @@ class datapull:
|
|
|
678
702
|
df_weekly_monthly_dummies.rename(columns={'week_start': 'Date'}, inplace=True)
|
|
679
703
|
df_weekly_monthly_dummies.set_index('Date', inplace=True)
|
|
680
704
|
|
|
681
|
-
|
|
682
705
|
# ---------------------------------------------------------------------
|
|
683
706
|
# 6. Combine all weekly components
|
|
684
707
|
# ---------------------------------------------------------------------
|
|
@@ -697,15 +720,15 @@ class datapull:
|
|
|
697
720
|
|
|
698
721
|
# Ensure correct types for flag columns (int)
|
|
699
722
|
for col in df_weekly_flags.columns:
|
|
700
|
-
if col
|
|
701
|
-
|
|
723
|
+
if col != 'seas_payday':
|
|
724
|
+
if col in df_combined.columns:
|
|
725
|
+
df_combined[col] = df_combined[col].astype(int)
|
|
702
726
|
|
|
703
727
|
# Ensure correct types for month columns (float)
|
|
704
728
|
for col in df_weekly_monthly_dummies.columns:
|
|
705
729
|
if col in df_combined.columns:
|
|
706
730
|
df_combined[col] = df_combined[col].astype(float)
|
|
707
731
|
|
|
708
|
-
|
|
709
732
|
# ---------------------------------------------------------------------
|
|
710
733
|
# 7. Create weekly dummies for Week of Year & yearly dummies from aggregated cols
|
|
711
734
|
# ---------------------------------------------------------------------
|
|
@@ -737,7 +760,7 @@ class datapull:
|
|
|
737
760
|
# Filter out columns not in the desired order list (handles case where dum_ cols are off)
|
|
738
761
|
final_cols = [col for col in cols_order if col in df_combined.columns]
|
|
739
762
|
df_combined = df_combined[final_cols]
|
|
740
|
-
|
|
763
|
+
|
|
741
764
|
return df_combined
|
|
742
765
|
|
|
743
766
|
def pull_weather(self, week_commencing, start_date, country_codes) -> pd.DataFrame:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|