forecastutils 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forecastutils-0.1.0/PKG-INFO +15 -0
- forecastutils-0.1.0/forecastutils.egg-info/PKG-INFO +15 -0
- forecastutils-0.1.0/forecastutils.egg-info/SOURCES.txt +8 -0
- forecastutils-0.1.0/forecastutils.egg-info/dependency_links.txt +1 -0
- forecastutils-0.1.0/forecastutils.egg-info/requires.txt +9 -0
- forecastutils-0.1.0/forecastutils.egg-info/top_level.txt +1 -0
- forecastutils-0.1.0/setup.cfg +4 -0
- forecastutils-0.1.0/setup.py +17 -0
- forecastutils-0.1.0/test/test_core.py +43 -0
- forecastutils-0.1.0/test/test_preprocessing.py +101 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: forecastutils
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Custom utilities for prediction pipeline
|
|
5
|
+
Requires-Dist: pytest
|
|
6
|
+
Requires-Dist: scikit-learn
|
|
7
|
+
Requires-Dist: xgboost
|
|
8
|
+
Requires-Dist: pandas
|
|
9
|
+
Requires-Dist: numpy
|
|
10
|
+
Requires-Dist: holidays
|
|
11
|
+
Requires-Dist: vacances_scolaires_france
|
|
12
|
+
Requires-Dist: matplotlib
|
|
13
|
+
Requires-Dist: requests
|
|
14
|
+
Dynamic: requires-dist
|
|
15
|
+
Dynamic: summary
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: forecastutils
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Custom utilities for prediction pipeline
|
|
5
|
+
Requires-Dist: pytest
|
|
6
|
+
Requires-Dist: scikit-learn
|
|
7
|
+
Requires-Dist: xgboost
|
|
8
|
+
Requires-Dist: pandas
|
|
9
|
+
Requires-Dist: numpy
|
|
10
|
+
Requires-Dist: holidays
|
|
11
|
+
Requires-Dist: vacances_scolaires_france
|
|
12
|
+
Requires-Dist: matplotlib
|
|
13
|
+
Requires-Dist: requests
|
|
14
|
+
Dynamic: requires-dist
|
|
15
|
+
Dynamic: summary
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
import pathlib
|
|
3
|
+
|
|
4
|
+
# Lit le requirements.txt
|
|
5
|
+
this_dir = pathlib.Path(__file__).parent
|
|
6
|
+
requirements_path = this_dir / "requirements.txt"
|
|
7
|
+
with open(requirements_path, "r") as f:
|
|
8
|
+
requirements = f.read().splitlines()
|
|
9
|
+
|
|
10
|
+
setup(
|
|
11
|
+
name="forecastutils",
|
|
12
|
+
version="0.1.0",
|
|
13
|
+
packages=find_packages(),
|
|
14
|
+
install_requires=requirements,
|
|
15
|
+
include_package_data=True,
|
|
16
|
+
description="Custom utilities for prediction pipeline",
|
|
17
|
+
)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
from sklearn.pipeline import Pipeline
|
|
5
|
+
from sklearn.linear_model import Ridge
|
|
6
|
+
from sklearn.preprocessing import StandardScaler
|
|
7
|
+
import forecastutils.core as fcore
|
|
8
|
+
|
|
9
|
+
def test_build_model_forecast():
|
|
10
|
+
# Données synthétiques
|
|
11
|
+
n_samples = 100
|
|
12
|
+
X = pd.DataFrame({'feature': np.arange(n_samples)})
|
|
13
|
+
y = np.arange(n_samples) + np.random.normal(0, 1, n_samples)
|
|
14
|
+
|
|
15
|
+
# Pipeline simple
|
|
16
|
+
pipeline = Pipeline([
|
|
17
|
+
('scaler', StandardScaler()),
|
|
18
|
+
('regressor', Ridge())
|
|
19
|
+
])
|
|
20
|
+
|
|
21
|
+
# Grille de paramètres
|
|
22
|
+
param_grids = {
|
|
23
|
+
'pipeline_0': {
|
|
24
|
+
'regressor__alpha': [0.1, 1.0, 10.0]
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# Appel de la fonction
|
|
29
|
+
results = fcore.build_model_forecast(
|
|
30
|
+
pipelines=[pipeline],
|
|
31
|
+
param_grids=param_grids,
|
|
32
|
+
X=X,
|
|
33
|
+
y=y,
|
|
34
|
+
scoring='neg_mean_absolute_error',
|
|
35
|
+
n_splits=5,
|
|
36
|
+
verbose=0
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Vérifications
|
|
40
|
+
assert 'pipeline_0' in results, "Le nom du pipeline n'est pas dans les résultats"
|
|
41
|
+
assert 'best_model' in results['pipeline_0'], "Le modèle optimal n'est pas présent"
|
|
42
|
+
assert isinstance(results['pipeline_0']['best_score_cv'], float), "Le score CV doit être un float"
|
|
43
|
+
assert isinstance(results['pipeline_0']['best_params'], dict), "Les paramètres optimaux doivent être un dictionnaire"
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
import forecastutils.preprocessing as pp
|
|
5
|
+
|
|
6
|
+
def test_month_binary_encoder():
|
|
7
|
+
# Données de test
|
|
8
|
+
df = pd.DataFrame({
|
|
9
|
+
'date': ['2023-01-15', '2023-06-10', '2023-12-25']
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
# Transformer
|
|
13
|
+
encoder = pp.MonthBinaryEncoder(date_column='date')
|
|
14
|
+
transformed_df = encoder.fit_transform(df)
|
|
15
|
+
|
|
16
|
+
# Vérifications
|
|
17
|
+
assert 'is_janvier' in transformed_df.columns
|
|
18
|
+
assert 'is_mai' in transformed_df.columns
|
|
19
|
+
assert 'is_decembre' in transformed_df.columns
|
|
20
|
+
assert transformed_df.loc[0, 'is_janvier'] == True
|
|
21
|
+
assert transformed_df.loc[0, 'is_decembre'] == False
|
|
22
|
+
assert transformed_df.loc[1, 'is_juin'] == True
|
|
23
|
+
assert transformed_df.loc[2, 'is_decembre'] == True
|
|
24
|
+
|
|
25
|
+
# Vérifie qu’il n’y a qu’un seul mois actif par ligne
|
|
26
|
+
for i in range(len(transformed_df)):
|
|
27
|
+
mois_actifs = [col for col in transformed_df.columns if col.startswith('is_') and transformed_df.loc[i, col]]
|
|
28
|
+
assert len(mois_actifs) == 1
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_monthly_sum_lag_transformer():
|
|
32
|
+
# Données de test
|
|
33
|
+
data = pd.DataFrame({
|
|
34
|
+
'date': pd.date_range(start='2023-01-01', periods=60, freq='D'),
|
|
35
|
+
'value': np.ones(60)
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
# Transformer avec lag de 1 mois
|
|
39
|
+
transformer = pp.MonthlySumLagTransformer(date_col='date', value_col='value', lags=[1], fillna=False)
|
|
40
|
+
transformed = transformer.transform(data)
|
|
41
|
+
|
|
42
|
+
# Vérifications
|
|
43
|
+
assert 'value_monthly_sum_lag1' in transformed.columns
|
|
44
|
+
assert 'month' not in transformed.columns
|
|
45
|
+
assert 'value_monthly_sum' not in transformed.columns
|
|
46
|
+
|
|
47
|
+
# Vérifie que les valeurs de lag sont correctes
|
|
48
|
+
jan_sum = data[data['date'].dt.month == 1]['value'].sum()
|
|
49
|
+
feb_sum = data[data['date'].dt.month == 2]['value'].sum()
|
|
50
|
+
|
|
51
|
+
# Les lignes de février doivent avoir la somme de janvier
|
|
52
|
+
feb_dates = transformed[transformed['date'].dt.month == 2]
|
|
53
|
+
assert all(feb_dates['value_monthly_sum_lag1'] == jan_sum)
|
|
54
|
+
|
|
55
|
+
# Les lignes de janvier doivent etre nulles
|
|
56
|
+
jan_dates = transformed[transformed['date'].dt.month == 1]
|
|
57
|
+
assert jan_dates['value_monthly_sum_lag1'].isna().all()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_moving_average_delta_transformer():
|
|
61
|
+
# Données de test
|
|
62
|
+
data = pd.DataFrame({
|
|
63
|
+
'date': pd.date_range(start='2023-01-01', periods=30, freq='D'),
|
|
64
|
+
'value': range(30)
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
# Initialisation du transformer
|
|
68
|
+
transformer = pp.MovingAverageDeltaTransformer(date_column='date', value_column='value', window_pairs=[(3, 5)])
|
|
69
|
+
|
|
70
|
+
# Transformation
|
|
71
|
+
transformed = transformer.transform(data)
|
|
72
|
+
|
|
73
|
+
# Vérifications
|
|
74
|
+
assert 'delta_ma_3_5' in transformed.columns, "La colonne delta n'a pas été ajoutée"
|
|
75
|
+
assert len(transformed) == len(data), "La taille du DataFrame transformé est incorrecte"
|
|
76
|
+
assert transformed['delta_ma_3_5'][3] == 0.5, "Le calcul du delta de moving average est faux"
|
|
77
|
+
assert not transformed['delta_ma_3_5'].isnull().all(), "Toutes les valeurs delta sont nulles"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_remaining_weekdays_excluding_today():
|
|
81
|
+
df = pd.DataFrame({
|
|
82
|
+
'date': pd.to_datetime(['2023-10-25']), # Mercredi
|
|
83
|
+
'value': [42]
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
transformer = pp.RemainingWeekdaysInMonthTransformer(date_column='date')
|
|
87
|
+
transformed_df = transformer.transform(df)
|
|
88
|
+
|
|
89
|
+
expected_counts = {
|
|
90
|
+
'Monday': 1, # 30
|
|
91
|
+
'Tuesday': 1, # 31
|
|
92
|
+
'Wednesday': 0, # 25 est exclu, pas d'autre mercredi
|
|
93
|
+
'Thursday': 1, # 26
|
|
94
|
+
'Friday': 1, # 27
|
|
95
|
+
'Saturday': 1, # 28
|
|
96
|
+
'Sunday': 1 # 29
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
for day, expected in expected_counts.items():
|
|
100
|
+
actual = transformed_df.loc[0, day]
|
|
101
|
+
assert actual == expected, f"Erreur pour {day}: attendu {expected}, obtenu {actual}"
|