imsciences 0.9.6.8__tar.gz → 0.9.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of imsciences might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.9.6.8
3
+ Version: 0.9.7.0
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -25,6 +25,7 @@ Requires-Dist: holidays
25
25
  Requires-Dist: google-analytics-data
26
26
  Requires-Dist: geopandas
27
27
  Requires-Dist: geopy
28
+ Requires-Dist: workalendar
28
29
 
29
30
  # IMS Package Documentation
30
31
 
@@ -583,37 +583,32 @@ class dataprocessing:
583
583
 
584
584
  return pivoted_df
585
585
 
586
- def apply_lookup_table_for_columns(self, df, col_names, to_find_dict, if_not_in_dict="Other", new_column_name="Mapping"):
586
+ def apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict="Other", new_column_name="Mapping"):
587
587
  """
588
- Creates a new DataFrame column based on a look up table, possibly with multiple columns to look up on (dictionary of substrings to class mappings).
588
+ Creates a new DataFrame column based on a look up table, using exact matches.
589
589
 
590
590
  Parameters:
591
591
  df (pandas.DataFrame): The DataFrame containing the data.
592
- col_names (list of str): these are the columns which are used for the lookup. One column or several columns can be inputted as a list, provided there is a merged column to lookup on. If there are multiple columns to look up on then a merged column must be inputted as the key of the dictionary of format e.g. col1|col2|col3
593
- to_find_dict (dict): your look up table, where keys are the values being looked up, and the values are the resulting mappings.
594
- if_not_in_dict (str, optional): default value if no substring matches are found in the look up table dictionary. Defaults to "Other".
595
- new_column_name (str, optional): name of new column. Defaults to "Mapping".
592
+ col_names (list of str): List of column names to use for lookup. If more than one, values are merged with '|'.
593
+ to_find_dict (dict): Lookup dictionary with exact keys to match.
594
+ if_not_in_dict (str, optional): Value used if no match is found. Defaults to "Other".
595
+ new_column_name (str, optional): Name of new output column. Defaults to "Mapping".
596
596
 
597
597
  Returns:
598
- pandas.DataFrame: DataFrame with a new column containing the look up table results.
598
+ pandas.DataFrame: DataFrame with a new column containing lookup results.
599
599
  """
600
600
 
601
- # Create regex pattern with word boundaries from the dictionary
602
- regex_pattern = "|".join(r'\b' + re.escape(key) + r'\b' for key in to_find_dict.keys())
603
-
604
601
  # Preprocess DataFrame if multiple columns
605
602
  if len(col_names) > 1:
606
- df["Merged"] = df[col_names].astype(str).apply('|'.join, axis=1)
603
+ df["Merged"] = df[col_names].astype(str).agg('|'.join, axis=1)
607
604
  col_to_use = "Merged"
608
605
  else:
609
606
  col_to_use = col_names[0]
610
607
 
611
- # Extract the first match using the regex pattern
612
- matches = df[col_to_use].str.extract(f'({regex_pattern})', expand=False, flags=re.IGNORECASE)
613
-
614
- # Map the matches to the corresponding values in the dictionary
615
- df[new_column_name] = matches.str.lower().map({k.lower(): v for k, v in to_find_dict.items()}).fillna(if_not_in_dict)
616
-
608
+ # Normalize case for matching
609
+ lookup = {k.lower(): v for k, v in to_find_dict.items()}
610
+ df[new_column_name] = df[col_to_use].str.lower().map(lookup).fillna(if_not_in_dict)
611
+
617
612
  # Drop intermediate column if created
618
613
  if len(col_names) > 1:
619
614
  df.drop(columns=["Merged"], inplace=True)
@@ -381,7 +381,7 @@ class datapull:
381
381
 
382
382
  ############################################################### Seasonality ##########################################################################
383
383
 
384
- def pull_seasonality(week_commencing, start_date, countries):
384
+ def pull_seasonality(self, week_commencing, start_date, countries):
385
385
  """
386
386
  Generates a DataFrame with weekly seasonality features.
387
387
 
@@ -1197,7 +1197,7 @@ class datapull:
1197
1197
  def pull_weather(self, week_commencing, start_date, country_codes) -> pd.DataFrame:
1198
1198
  """
1199
1199
  Pull weather data for a given week-commencing day and one or more country codes.
1200
-
1200
+ Tester
1201
1201
  LOGIC:
1202
1202
  1) For non-US countries (AU, GB, DE, CA, ZA):
1203
1203
  - Mesonet => max_temp_f, min_temp_f -> compute mean_temp_f -> weekly average => 'avg_max_temp_f', etc.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.9.6.8
3
+ Version: 0.9.7.0
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -25,6 +25,7 @@ Requires-Dist: holidays
25
25
  Requires-Dist: google-analytics-data
26
26
  Requires-Dist: geopandas
27
27
  Requires-Dist: geopy
28
+ Requires-Dist: workalendar
28
29
 
29
30
  # IMS Package Documentation
30
31
 
@@ -10,3 +10,4 @@ holidays
10
10
  google-analytics-data
11
11
  geopandas
12
12
  geopy
13
+ workalendar
@@ -8,7 +8,7 @@ def read_md(file_name):
8
8
  return f.read()
9
9
  return ''
10
10
 
11
- VERSION = '0.9.6.8'
11
+ VERSION = '0.9.7.0'
12
12
  DESCRIPTION = 'IMS Data Processing Package'
13
13
  LONG_DESCRIPTION = read_md('README.md')
14
14
 
@@ -24,7 +24,7 @@ setup(
24
24
  packages=find_packages(),
25
25
  install_requires=[
26
26
  "pandas", "plotly", "numpy", "fredapi", "xgboost", "scikit-learn",
27
- "bs4", "yfinance", "holidays", "google-analytics-data", "geopandas", "geopy"
27
+ "bs4", "yfinance", "holidays", "google-analytics-data", "geopandas", "geopy", "workalendar"
28
28
  ],
29
29
  keywords=['data processing', 'apis', 'data analysis', 'data visualization', 'machine learning'],
30
30
  classifiers=[
File without changes
File without changes
File without changes