imsciences 0.9.6.8__tar.gz → 0.9.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of imsciences might be problematic. Click here for more details.
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/PKG-INFO +2 -1
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/imsciences/mmm.py +12 -17
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/imsciences/pull.py +2 -2
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/imsciences.egg-info/PKG-INFO +2 -1
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/imsciences.egg-info/requires.txt +1 -0
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/setup.py +2 -2
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/LICENSE.txt +0 -0
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/README.md +0 -0
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/imsciences/__init__.py +0 -0
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/imsciences/geo.py +0 -0
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/imsciences/unittesting.py +0 -0
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/imsciences/vis.py +0 -0
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/imsciences.egg-info/PKG-INFO-TomG-HP-290722 +0 -0
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/imsciences.egg-info/SOURCES.txt +0 -0
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/imsciences.egg-info/dependency_links.txt +0 -0
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/imsciences.egg-info/top_level.txt +0 -0
- {imsciences-0.9.6.8 → imsciences-0.9.7.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: imsciences
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.7.0
|
|
4
4
|
Summary: IMS Data Processing Package
|
|
5
5
|
Author: IMS
|
|
6
6
|
Author-email: cam@im-sciences.com
|
|
@@ -25,6 +25,7 @@ Requires-Dist: holidays
|
|
|
25
25
|
Requires-Dist: google-analytics-data
|
|
26
26
|
Requires-Dist: geopandas
|
|
27
27
|
Requires-Dist: geopy
|
|
28
|
+
Requires-Dist: workalendar
|
|
28
29
|
|
|
29
30
|
# IMS Package Documentation
|
|
30
31
|
|
|
@@ -583,37 +583,32 @@ class dataprocessing:
|
|
|
583
583
|
|
|
584
584
|
return pivoted_df
|
|
585
585
|
|
|
586
|
-
def apply_lookup_table_for_columns(
|
|
586
|
+
def apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict="Other", new_column_name="Mapping"):
|
|
587
587
|
"""
|
|
588
|
-
Creates a new DataFrame column based on a look up table,
|
|
588
|
+
Creates a new DataFrame column based on a look up table, using exact matches.
|
|
589
589
|
|
|
590
590
|
Parameters:
|
|
591
591
|
df (pandas.DataFrame): The DataFrame containing the data.
|
|
592
|
-
col_names (list of str):
|
|
593
|
-
to_find_dict (dict):
|
|
594
|
-
if_not_in_dict (str, optional):
|
|
595
|
-
new_column_name (str, optional):
|
|
592
|
+
col_names (list of str): List of column names to use for lookup. If more than one, values are merged with '|'.
|
|
593
|
+
to_find_dict (dict): Lookup dictionary with exact keys to match.
|
|
594
|
+
if_not_in_dict (str, optional): Value used if no match is found. Defaults to "Other".
|
|
595
|
+
new_column_name (str, optional): Name of new output column. Defaults to "Mapping".
|
|
596
596
|
|
|
597
597
|
Returns:
|
|
598
|
-
pandas.DataFrame: DataFrame with a new column containing
|
|
598
|
+
pandas.DataFrame: DataFrame with a new column containing lookup results.
|
|
599
599
|
"""
|
|
600
600
|
|
|
601
|
-
# Create regex pattern with word boundaries from the dictionary
|
|
602
|
-
regex_pattern = "|".join(r'\b' + re.escape(key) + r'\b' for key in to_find_dict.keys())
|
|
603
|
-
|
|
604
601
|
# Preprocess DataFrame if multiple columns
|
|
605
602
|
if len(col_names) > 1:
|
|
606
|
-
df["Merged"] = df[col_names].astype(str).
|
|
603
|
+
df["Merged"] = df[col_names].astype(str).agg('|'.join, axis=1)
|
|
607
604
|
col_to_use = "Merged"
|
|
608
605
|
else:
|
|
609
606
|
col_to_use = col_names[0]
|
|
610
607
|
|
|
611
|
-
#
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
df[new_column_name] = matches.str.lower().map({k.lower(): v for k, v in to_find_dict.items()}).fillna(if_not_in_dict)
|
|
616
|
-
|
|
608
|
+
# Normalize case for matching
|
|
609
|
+
lookup = {k.lower(): v for k, v in to_find_dict.items()}
|
|
610
|
+
df[new_column_name] = df[col_to_use].str.lower().map(lookup).fillna(if_not_in_dict)
|
|
611
|
+
|
|
617
612
|
# Drop intermediate column if created
|
|
618
613
|
if len(col_names) > 1:
|
|
619
614
|
df.drop(columns=["Merged"], inplace=True)
|
|
@@ -381,7 +381,7 @@ class datapull:
|
|
|
381
381
|
|
|
382
382
|
############################################################### Seasonality ##########################################################################
|
|
383
383
|
|
|
384
|
-
def pull_seasonality(week_commencing, start_date, countries):
|
|
384
|
+
def pull_seasonality(self, week_commencing, start_date, countries):
|
|
385
385
|
"""
|
|
386
386
|
Generates a DataFrame with weekly seasonality features.
|
|
387
387
|
|
|
@@ -1197,7 +1197,7 @@ class datapull:
|
|
|
1197
1197
|
def pull_weather(self, week_commencing, start_date, country_codes) -> pd.DataFrame:
|
|
1198
1198
|
"""
|
|
1199
1199
|
Pull weather data for a given week-commencing day and one or more country codes.
|
|
1200
|
-
|
|
1200
|
+
Tester
|
|
1201
1201
|
LOGIC:
|
|
1202
1202
|
1) For non-US countries (AU, GB, DE, CA, ZA):
|
|
1203
1203
|
- Mesonet => max_temp_f, min_temp_f -> compute mean_temp_f -> weekly average => 'avg_max_temp_f', etc.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: imsciences
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.7.0
|
|
4
4
|
Summary: IMS Data Processing Package
|
|
5
5
|
Author: IMS
|
|
6
6
|
Author-email: cam@im-sciences.com
|
|
@@ -25,6 +25,7 @@ Requires-Dist: holidays
|
|
|
25
25
|
Requires-Dist: google-analytics-data
|
|
26
26
|
Requires-Dist: geopandas
|
|
27
27
|
Requires-Dist: geopy
|
|
28
|
+
Requires-Dist: workalendar
|
|
28
29
|
|
|
29
30
|
# IMS Package Documentation
|
|
30
31
|
|
|
@@ -8,7 +8,7 @@ def read_md(file_name):
|
|
|
8
8
|
return f.read()
|
|
9
9
|
return ''
|
|
10
10
|
|
|
11
|
-
VERSION = '0.9.
|
|
11
|
+
VERSION = '0.9.7.0'
|
|
12
12
|
DESCRIPTION = 'IMS Data Processing Package'
|
|
13
13
|
LONG_DESCRIPTION = read_md('README.md')
|
|
14
14
|
|
|
@@ -24,7 +24,7 @@ setup(
|
|
|
24
24
|
packages=find_packages(),
|
|
25
25
|
install_requires=[
|
|
26
26
|
"pandas", "plotly", "numpy", "fredapi", "xgboost", "scikit-learn",
|
|
27
|
-
"bs4", "yfinance", "holidays", "google-analytics-data", "geopandas", "geopy"
|
|
27
|
+
"bs4", "yfinance", "holidays", "google-analytics-data", "geopandas", "geopy", "workalendar"
|
|
28
28
|
],
|
|
29
29
|
keywords=['data processing', 'apis', 'data analysis', 'data visualization', 'machine learning'],
|
|
30
30
|
classifiers=[
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|