forecastutils 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: forecastutils
3
+ Version: 0.1.0
4
+ Summary: Custom utilities for prediction pipeline
5
+ Requires-Dist: pytest
6
+ Requires-Dist: scikit-learn
7
+ Requires-Dist: xgboost
8
+ Requires-Dist: pandas
9
+ Requires-Dist: numpy
10
+ Requires-Dist: holidays
11
+ Requires-Dist: vacances_scolaires_france
12
+ Requires-Dist: matplotlib
13
+ Requires-Dist: requests
14
+ Dynamic: requires-dist
15
+ Dynamic: summary
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: forecastutils
3
+ Version: 0.1.0
4
+ Summary: Custom utilities for prediction pipeline
5
+ Requires-Dist: pytest
6
+ Requires-Dist: scikit-learn
7
+ Requires-Dist: xgboost
8
+ Requires-Dist: pandas
9
+ Requires-Dist: numpy
10
+ Requires-Dist: holidays
11
+ Requires-Dist: vacances_scolaires_france
12
+ Requires-Dist: matplotlib
13
+ Requires-Dist: requests
14
+ Dynamic: requires-dist
15
+ Dynamic: summary
@@ -0,0 +1,8 @@
1
+ setup.py
2
+ forecastutils.egg-info/PKG-INFO
3
+ forecastutils.egg-info/SOURCES.txt
4
+ forecastutils.egg-info/dependency_links.txt
5
+ forecastutils.egg-info/requires.txt
6
+ forecastutils.egg-info/top_level.txt
7
+ test/test_core.py
8
+ test/test_preprocessing.py
@@ -0,0 +1,9 @@
1
+ pytest
2
+ scikit-learn
3
+ xgboost
4
+ pandas
5
+ numpy
6
+ holidays
7
+ vacances_scolaires_france
8
+ matplotlib
9
+ requests
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,17 @@
1
+ from setuptools import setup, find_packages
2
+ import pathlib
3
+
4
+ # Lit le requirements.txt
5
+ this_dir = pathlib.Path(__file__).parent
6
+ requirements_path = this_dir / "requirements.txt"
7
+ with open(requirements_path, "r") as f:
8
+ requirements = f.read().splitlines()
9
+
10
+ setup(
11
+ name="forecastutils",
12
+ version="0.1.0",
13
+ packages=find_packages(),
14
+ install_requires=requirements,
15
+ include_package_data=True,
16
+ description="Custom utilities for prediction pipeline",
17
+ )
@@ -0,0 +1,43 @@
1
+ import pytest
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.pipeline import Pipeline
5
+ from sklearn.linear_model import Ridge
6
+ from sklearn.preprocessing import StandardScaler
7
+ import forecastutils.core as fcore
8
+
9
+ def test_build_model_forecast():
10
+ # Données synthétiques
11
+ n_samples = 100
12
+ X = pd.DataFrame({'feature': np.arange(n_samples)})
13
+ y = np.arange(n_samples) + np.random.normal(0, 1, n_samples)
14
+
15
+ # Pipeline simple
16
+ pipeline = Pipeline([
17
+ ('scaler', StandardScaler()),
18
+ ('regressor', Ridge())
19
+ ])
20
+
21
+ # Grille de paramètres
22
+ param_grids = {
23
+ 'pipeline_0': {
24
+ 'regressor__alpha': [0.1, 1.0, 10.0]
25
+ }
26
+ }
27
+
28
+ # Appel de la fonction
29
+ results = fcore.build_model_forecast(
30
+ pipelines=[pipeline],
31
+ param_grids=param_grids,
32
+ X=X,
33
+ y=y,
34
+ scoring='neg_mean_absolute_error',
35
+ n_splits=5,
36
+ verbose=0
37
+ )
38
+
39
+ # Vérifications
40
+ assert 'pipeline_0' in results, "Le nom du pipeline n'est pas dans les résultats"
41
+ assert 'best_model' in results['pipeline_0'], "Le modèle optimal n'est pas présent"
42
+ assert isinstance(results['pipeline_0']['best_score_cv'], float), "Le score CV doit être un float"
43
+ assert isinstance(results['pipeline_0']['best_params'], dict), "Les paramètres optimaux doivent être un dictionnaire"
@@ -0,0 +1,101 @@
1
+ import pytest
2
+ import pandas as pd
3
+ import numpy as np
4
+ import forecastutils.preprocessing as pp
5
+
6
+ def test_month_binary_encoder():
7
+ # Données de test
8
+ df = pd.DataFrame({
9
+ 'date': ['2023-01-15', '2023-06-10', '2023-12-25']
10
+ })
11
+
12
+ # Transformer
13
+ encoder = pp.MonthBinaryEncoder(date_column='date')
14
+ transformed_df = encoder.fit_transform(df)
15
+
16
+ # Vérifications
17
+ assert 'is_janvier' in transformed_df.columns
18
+ assert 'is_mai' in transformed_df.columns
19
+ assert 'is_decembre' in transformed_df.columns
20
+ assert transformed_df.loc[0, 'is_janvier'] == True
21
+ assert transformed_df.loc[0, 'is_decembre'] == False
22
+ assert transformed_df.loc[1, 'is_juin'] == True
23
+ assert transformed_df.loc[2, 'is_decembre'] == True
24
+
25
+ # Vérifie qu’il n’y a qu’un seul mois actif par ligne
26
+ for i in range(len(transformed_df)):
27
+ mois_actifs = [col for col in transformed_df.columns if col.startswith('is_') and transformed_df.loc[i, col]]
28
+ assert len(mois_actifs) == 1
29
+
30
+
31
+ def test_monthly_sum_lag_transformer():
32
+ # Données de test
33
+ data = pd.DataFrame({
34
+ 'date': pd.date_range(start='2023-01-01', periods=60, freq='D'),
35
+ 'value': np.ones(60)
36
+ })
37
+
38
+ # Transformer avec lag de 1 mois
39
+ transformer = pp.MonthlySumLagTransformer(date_col='date', value_col='value', lags=[1], fillna=False)
40
+ transformed = transformer.transform(data)
41
+
42
+ # Vérifications
43
+ assert 'value_monthly_sum_lag1' in transformed.columns
44
+ assert 'month' not in transformed.columns
45
+ assert 'value_monthly_sum' not in transformed.columns
46
+
47
+ # Vérifie que les valeurs de lag sont correctes
48
+ jan_sum = data[data['date'].dt.month == 1]['value'].sum()
49
+ feb_sum = data[data['date'].dt.month == 2]['value'].sum()
50
+
51
+ # Les lignes de février doivent avoir la somme de janvier
52
+ feb_dates = transformed[transformed['date'].dt.month == 2]
53
+ assert all(feb_dates['value_monthly_sum_lag1'] == jan_sum)
54
+
55
+ # Les lignes de janvier doivent etre nulles
56
+ jan_dates = transformed[transformed['date'].dt.month == 1]
57
+ assert jan_dates['value_monthly_sum_lag1'].isna().all()
58
+
59
+
60
+ def test_moving_average_delta_transformer():
61
+ # Données de test
62
+ data = pd.DataFrame({
63
+ 'date': pd.date_range(start='2023-01-01', periods=30, freq='D'),
64
+ 'value': range(30)
65
+ })
66
+
67
+ # Initialisation du transformer
68
+ transformer = pp.MovingAverageDeltaTransformer(date_column='date', value_column='value', window_pairs=[(3, 5)])
69
+
70
+ # Transformation
71
+ transformed = transformer.transform(data)
72
+
73
+ # Vérifications
74
+ assert 'delta_ma_3_5' in transformed.columns, "La colonne delta n'a pas été ajoutée"
75
+ assert len(transformed) == len(data), "La taille du DataFrame transformé est incorrecte"
76
+ assert transformed['delta_ma_3_5'][3] == 0.5, "Le calcul du delta de moving average est faux"
77
+ assert not transformed['delta_ma_3_5'].isnull().all(), "Toutes les valeurs delta sont nulles"
78
+
79
+
80
+ def test_remaining_weekdays_excluding_today():
81
+ df = pd.DataFrame({
82
+ 'date': pd.to_datetime(['2023-10-25']), # Mercredi
83
+ 'value': [42]
84
+ })
85
+
86
+ transformer = pp.RemainingWeekdaysInMonthTransformer(date_column='date')
87
+ transformed_df = transformer.transform(df)
88
+
89
+ expected_counts = {
90
+ 'Monday': 1, # 30
91
+ 'Tuesday': 1, # 31
92
+ 'Wednesday': 0, # 25 est exclu, pas d'autre mercredi
93
+ 'Thursday': 1, # 26
94
+ 'Friday': 1, # 27
95
+ 'Saturday': 1, # 28
96
+ 'Sunday': 1 # 29
97
+ }
98
+
99
+ for day, expected in expected_counts.items():
100
+ actual = transformed_df.loc[0, day]
101
+ assert actual == expected, f"Erreur pour {day}: attendu {expected}, obtenu {actual}"