imsciences 0.9.6.5__py3-none-any.whl → 0.9.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of imsciences might be problematic. Click here for more details.

imsciences/pull.py CHANGED
@@ -379,7 +379,7 @@ class datapull:
379
379
 
380
380
  ############################################################### Seasonality ##########################################################################
381
381
 
382
- def pull_seasonality(self, week_commencing, start_date, countries):
382
+ def pull_seasonality(week_commencing, start_date, countries):
383
383
  """
384
384
  Generates a DataFrame with weekly seasonality features.
385
385
 
@@ -597,7 +597,7 @@ class datapull:
597
597
 
598
598
 
599
599
  # ---------------------------------------------------------------------
600
- # 4. Add daily indicators for last day & last Friday of month
600
+ # 4. Add daily indicators for last day & last Friday of month & payday
601
601
  # ---------------------------------------------------------------------
602
602
  df_daily["is_last_day_of_month"] = df_daily["Date"].dt.is_month_end
603
603
 
@@ -608,22 +608,27 @@ class datapull:
608
608
  # Check if next Friday is in the next month
609
609
  next_friday = date + timedelta(days=7)
610
610
  return 1 if next_friday.month != date.month else 0
611
+
612
+ def is_payday(date):
613
+ return 1 if date.day >= 25 else 0
611
614
 
612
615
  df_daily["is_last_friday_of_month"] = df_daily["Date"].apply(is_last_friday)
613
-
616
+
617
+ df_daily["is_payday"] = df_daily["Date"].apply(is_payday)
618
+
614
619
  # Rename for clarity prefix
615
620
  df_daily.rename(columns={
616
621
  "is_last_day_of_month": "seas_last_day_of_month",
617
- "is_last_friday_of_month": "seas_last_friday_of_month"
622
+ "is_last_friday_of_month": "seas_last_friday_of_month",
623
+ "is_payday": "seas_payday"
618
624
  }, inplace=True)
619
625
 
620
-
621
626
  # ---------------------------------------------------------------------
622
627
  # 5. Weekly aggregation
623
628
  # ---------------------------------------------------------------------
624
629
 
625
630
  # Select only columns that are indicators/flags (intended for max aggregation)
626
- flag_cols = [col for col in df_daily.columns if col.startswith('seas_') or col.startswith('is_')]
631
+ flag_cols = [col for col in df_daily.columns if (col.startswith('seas_') or col.startswith('is_')) and col != "seas_payday"]
627
632
  # Ensure 'week_start' is present for grouping
628
633
  df_to_agg = df_daily[['week_start'] + flag_cols]
629
634
 
@@ -635,7 +640,26 @@ class datapull:
635
640
  .rename(columns={'week_start': "Date"})
636
641
  .set_index("Date")
637
642
  )
643
+
644
+ # Do specific aggregation for payday
645
+ # Make sure 'date' column exists in df_daily
646
+ df_daily["month"] = df_daily["Date"].dt.month
647
+ df_daily["year"] = df_daily["Date"].dt.year
648
+
649
+ # Sum of seas_payday flags per week
650
+ week_payday_sum = df_daily.groupby("week_start")["seas_payday"].sum()
638
651
 
652
+ # Divide the number of payday flags by number of paydays per month
653
+ payday_days_in_month = (
654
+ df_daily.groupby(["year", "month"])["seas_payday"].sum()
655
+ )
656
+ week_month = df_daily.groupby("week_start").first()[["month", "year"]]
657
+ week_days_in_month = week_month.apply(lambda row: payday_days_in_month.loc[(row["year"], row["month"])], axis=1)
658
+ df_weekly_flags["seas_payday"] = (week_payday_sum / week_days_in_month).fillna(0).values
659
+
660
+ # # Drop intermediate columns
661
+ # df_weekly_flags = df_weekly_flags.drop(columns=["month", "year"])
662
+
639
663
  # --- Aggregate Week Number using MODE ---
640
664
  # Define aggregation function for mode (handling potential multi-modal cases by taking the first)
641
665
  def get_mode(x):
@@ -678,7 +702,6 @@ class datapull:
678
702
  df_weekly_monthly_dummies.rename(columns={'week_start': 'Date'}, inplace=True)
679
703
  df_weekly_monthly_dummies.set_index('Date', inplace=True)
680
704
 
681
-
682
705
  # ---------------------------------------------------------------------
683
706
  # 6. Combine all weekly components
684
707
  # ---------------------------------------------------------------------
@@ -697,15 +720,15 @@ class datapull:
697
720
 
698
721
  # Ensure correct types for flag columns (int)
699
722
  for col in df_weekly_flags.columns:
700
- if col in df_combined.columns:
701
- df_combined[col] = df_combined[col].astype(int)
723
+ if col != 'seas_payday':
724
+ if col in df_combined.columns:
725
+ df_combined[col] = df_combined[col].astype(int)
702
726
 
703
727
  # Ensure correct types for month columns (float)
704
728
  for col in df_weekly_monthly_dummies.columns:
705
729
  if col in df_combined.columns:
706
730
  df_combined[col] = df_combined[col].astype(float)
707
731
 
708
-
709
732
  # ---------------------------------------------------------------------
710
733
  # 7. Create weekly dummies for Week of Year & yearly dummies from aggregated cols
711
734
  # ---------------------------------------------------------------------
@@ -737,7 +760,7 @@ class datapull:
737
760
  # Filter out columns not in the desired order list (handles case where dum_ cols are off)
738
761
  final_cols = [col for col in cols_order if col in df_combined.columns]
739
762
  df_combined = df_combined[final_cols]
740
-
763
+
741
764
  return df_combined
742
765
 
743
766
  def pull_weather(self, week_commencing, start_date, country_codes) -> pd.DataFrame:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.9.6.5
3
+ Version: 0.9.6.7
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -1,12 +1,12 @@
1
1
  imsciences/__init__.py,sha256=_HuYeLbDMTdt7GpKI4r6-d7yRPZgcAQ7yOW0-ydR2Yo,117
2
2
  imsciences/geo.py,sha256=eenng7_BP_E2WD5Wt1G_oNxQS8W3t6lycRwJ91ngysY,15808
3
3
  imsciences/mmm.py,sha256=qMh0ccOepehfCcux7EeG8cq6piSEoFEz5iiJbDBWOS4,82214
4
- imsciences/pull.py,sha256=4NGKzNmsvfzADMucR8iLGTkYDyb5wdnqphe1CzepyWw,94992
4
+ imsciences/pull.py,sha256=vABqnOFpNaFEnpc24tz1VyHrekOFFGoCsGpkevy3dyY,96362
5
5
  imsciences/unittesting.py,sha256=U177_Txg0Lqn49zYRu5bl9OVe_X7MkNJ6V_Zd6DHOsU,45656
6
6
  imsciences/vis.py,sha256=2izdHQhmWEReerRqIxhY4Ai10VjL7xoUqyWyZC7-2XI,8931
7
- imsciences-0.9.6.5.dist-info/LICENSE.txt,sha256=lVq2QwcExPX4Kl2DHeEkRrikuItcDB1Pr7yF7FQ8_z8,1108
8
- imsciences-0.9.6.5.dist-info/METADATA,sha256=37dZ6LLY_2Vh9j06P1MUa9TNriZANXzE3CNEOpvC-BM,18846
9
- imsciences-0.9.6.5.dist-info/PKG-INFO-TomG-HP-290722,sha256=RMcthCSyWmU6IBsXGL-nYqw0RP06pzjPKK3dzOQcU-8,18846
10
- imsciences-0.9.6.5.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
11
- imsciences-0.9.6.5.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
12
- imsciences-0.9.6.5.dist-info/RECORD,,
7
+ imsciences-0.9.6.7.dist-info/LICENSE.txt,sha256=lVq2QwcExPX4Kl2DHeEkRrikuItcDB1Pr7yF7FQ8_z8,1108
8
+ imsciences-0.9.6.7.dist-info/METADATA,sha256=hcZHA2Nc1wwtaaXM92ozcCHXHlfMdIikSCgbzdL5z0U,18846
9
+ imsciences-0.9.6.7.dist-info/PKG-INFO-TomG-HP-290722,sha256=RMcthCSyWmU6IBsXGL-nYqw0RP06pzjPKK3dzOQcU-8,18846
10
+ imsciences-0.9.6.7.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
11
+ imsciences-0.9.6.7.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
12
+ imsciences-0.9.6.7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.1.0)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5