lecrapaud 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

@@ -60,9 +60,9 @@ class FeatureSelection(Base):
60
60
  )
61
61
  training_time = Column(Integer)
62
62
  best_features_path = Column(String(255))
63
- dataset_id = Column(
63
+ experiment_id = Column(
64
64
  BigInteger,
65
- ForeignKey("lecrapaud_datasets.id", ondelete="CASCADE"),
65
+ ForeignKey("lecrapaud_experiments.id", ondelete="CASCADE"),
66
66
  nullable=False,
67
67
  )
68
68
  target_id = Column(
@@ -71,8 +71,8 @@ class FeatureSelection(Base):
71
71
  nullable=False,
72
72
  )
73
73
 
74
- dataset = relationship(
75
- "Dataset", back_populates="feature_selections", lazy="selectin"
74
+ experiment = relationship(
75
+ "Experiment", back_populates="feature_selections", lazy="selectin"
76
76
  )
77
77
  target = relationship(
78
78
  "Target", back_populates="feature_selections", lazy="selectin"
@@ -92,7 +92,7 @@ class FeatureSelection(Base):
92
92
 
93
93
  __table_args__ = (
94
94
  UniqueConstraint(
95
- "dataset_id", "target_id", name="uq_feature_selection_composite"
95
+ "experiment_id", "target_id", name="uq_feature_selection_composite"
96
96
  ),
97
97
  )
98
98
 
@@ -43,9 +43,9 @@ class ModelSelection(Base):
43
43
  ForeignKey("lecrapaud_targets.id", ondelete="CASCADE"),
44
44
  nullable=False,
45
45
  )
46
- dataset_id = Column(
46
+ experiment_id = Column(
47
47
  BigInteger,
48
- ForeignKey("lecrapaud_datasets.id", ondelete="CASCADE"),
48
+ ForeignKey("lecrapaud_experiments.id", ondelete="CASCADE"),
49
49
  nullable=False,
50
50
  )
51
51
 
@@ -56,13 +56,13 @@ class ModelSelection(Base):
56
56
  cascade="all, delete-orphan",
57
57
  lazy="selectin",
58
58
  )
59
- dataset = relationship(
60
- "Dataset", back_populates="model_selections", lazy="selectin"
59
+ experiment = relationship(
60
+ "Experiment", back_populates="model_selections", lazy="selectin"
61
61
  )
62
62
  target = relationship("Target", back_populates="model_selections", lazy="selectin")
63
63
 
64
64
  __table_args__ = (
65
65
  UniqueConstraint(
66
- "target_id", "dataset_id", name="uq_model_selection_composite"
66
+ "target_id", "experiment_id", name="uq_model_selection_composite"
67
67
  ),
68
68
  )
@@ -6,6 +6,7 @@ from sqlalchemy import (
6
6
  ForeignKey,
7
7
  BigInteger,
8
8
  TIMESTAMP,
9
+ JSON,
9
10
  )
10
11
  from sqlalchemy import func
11
12
  from sqlalchemy.orm import relationship
@@ -43,9 +44,10 @@ class Score(Base):
43
44
  recall = Column(Float)
44
45
  f1 = Column(Float)
45
46
  roc_auc = Column(Float)
46
- threshold = Column(Float)
47
+ thresholds = Column(JSON)
47
48
  precision_at_threshold = Column(Float)
48
49
  recall_at_threshold = Column(Float)
50
+ f1_at_threshold = Column(Float)
49
51
  model_training_id = Column(
50
52
  BigInteger,
51
53
  ForeignKey("lecrapaud_model_trainings.id", ondelete="CASCADE"),
@@ -19,7 +19,7 @@ from sqlalchemy.orm import relationship, Mapped, mapped_column, DeclarativeBase
19
19
 
20
20
  from lecrapaud.db.session import get_db
21
21
  from lecrapaud.db.models.base import Base
22
- from lecrapaud.db.models.dataset import lecrapaud_dataset_target_association
22
+ from lecrapaud.db.models.experiment import lecrapaud_experiment_target_association
23
23
 
24
24
 
25
25
  class Target(Base):
@@ -38,9 +38,9 @@ class Target(Base):
38
38
  type = Column(String(50), nullable=False)
39
39
  description = Column(String(255))
40
40
 
41
- datasets = relationship(
42
- "Dataset",
43
- secondary=lecrapaud_dataset_target_association,
41
+ experiments = relationship(
42
+ "Experiment",
43
+ secondary=lecrapaud_experiment_target_association,
44
44
  back_populates="targets",
45
45
  lazy="selectin",
46
46
  )
lecrapaud/db/session.py CHANGED
@@ -14,7 +14,7 @@ _engine = None
14
14
  _SessionLocal = None
15
15
  if DB_URI:
16
16
  DATABASE_URL = DB_URI
17
- elif DB_USER and DB_PASSWORD and DB_HOST and DB_PORT and DB_NAME:
17
+ elif DB_USER:
18
18
  DATABASE_URL = (
19
19
  f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
20
20
  )
@@ -23,11 +23,10 @@ else:
23
23
 
24
24
 
25
25
  def init_db(uri: str = None):
26
- print(f"Initializing database with URI: {uri}")
27
26
  global _engine, _SessionLocal, DATABASE_URL
28
27
  if uri:
29
28
  DATABASE_URL = uri
30
- elif DB_USER and DB_PASSWORD and DB_HOST and DB_PORT and DB_NAME:
29
+ elif DB_USER:
31
30
  DATABASE_URL = (
32
31
  f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
33
32
  )
@@ -39,6 +38,7 @@ def init_db(uri: str = None):
39
38
  "DB_USER, DB_PASSWORD, DB_HOST, DB_PORT, DB_NAME or DB_URI, "
40
39
  "or provide a `uri` argument to LeCrapaud"
41
40
  )
41
+ print(f"Initializing database with URI: {DATABASE_URL}")
42
42
 
43
43
  # Use urlparse for robust parsing
44
44
  parsed = urlparse(DATABASE_URL)
@@ -77,7 +77,7 @@ def init_db(uri: str = None):
77
77
  @contextmanager
78
78
  def get_db():
79
79
  if _SessionLocal is None:
80
- raise RuntimeError("Database not initialized. Call `init_db()` first.")
80
+ init_db()
81
81
  db = _SessionLocal()
82
82
  try:
83
83
  yield db
lecrapaud/directories.py CHANGED
@@ -3,11 +3,9 @@ from pathlib import Path
3
3
 
4
4
  cwd = os.getcwd()
5
5
  tmp_dir = cwd + "/tmp"
6
- data_dir = tmp_dir + "/data"
7
6
  logger_dir = cwd + "/log"
8
7
 
9
8
  os.makedirs(tmp_dir, exist_ok=True)
10
- os.makedirs(data_dir, exist_ok=True)
11
9
  os.makedirs(logger_dir, exist_ok=True)
12
10
 
13
11
 
lecrapaud/experiment.py CHANGED
@@ -1,27 +1,25 @@
1
- import pandas as pd
2
1
  import os
3
2
  from pathlib import Path
4
3
 
4
+ import pandas as pd
5
+
6
+ # Set up coverage file path
5
7
  os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
6
8
 
7
- # Internal
9
+ # Internal imports
8
10
  from lecrapaud.directories import tmp_dir
9
- from lecrapaud.utils import logger
10
- from lecrapaud.config import PYTHON_ENV
11
- from lecrapaud.db import (
12
- Dataset,
13
- Target,
14
- )
11
+ from lecrapaud.db import Experiment, Target
15
12
  from lecrapaud.db.session import get_db
16
13
 
17
14
 
18
- def create_dataset(
15
+ def create_experiment(
19
16
  data: pd.DataFrame,
20
17
  corr_threshold,
21
18
  percentile,
22
19
  max_features,
23
20
  date_column,
24
21
  group_column,
22
+ experiment_name,
25
23
  **kwargs,
26
24
  ):
27
25
  dates = {}
@@ -37,20 +35,20 @@ def create_dataset(
37
35
  with get_db() as db:
38
36
  all_targets = Target.get_all(db=db)
39
37
  targets = [target for target in all_targets if target.name in data.columns]
40
- dataset_name = f"data_{groups["number_of_groups"] if group_column else 'ng'}_{corr_threshold}_{percentile}_{max_features}_{dates['start_date'].date() if date_column else 'nd'}_{dates['end_date'].date() if date_column else 'nd'}"
38
+ experiment_name = f"{experiment_name}_{groups["number_of_groups"] if group_column else 'ng'}_{corr_threshold}_{percentile}_{max_features}_{dates['start_date'].date() if date_column else 'nd'}_{dates['end_date'].date() if date_column else 'nd'}"
41
39
 
42
- dataset_dir = f"{tmp_dir}/{dataset_name}"
43
- preprocessing_dir = f"{dataset_dir}/preprocessing"
44
- data_dir = f"{dataset_dir}/data"
45
- os.makedirs(dataset_dir, exist_ok=True)
40
+ experiment_dir = f"{tmp_dir}/{experiment_name}"
41
+ preprocessing_dir = f"{experiment_dir}/preprocessing"
42
+ data_dir = f"{experiment_dir}/data"
43
+ os.makedirs(experiment_dir, exist_ok=True)
46
44
  os.makedirs(preprocessing_dir, exist_ok=True)
47
45
  os.makedirs(data_dir, exist_ok=True)
48
46
 
49
- dataset = Dataset.upsert(
47
+ experiment = Experiment.upsert(
50
48
  match_fields=["name"],
51
49
  db=db,
52
- name=dataset_name,
53
- path=Path(dataset_dir).resolve(),
50
+ name=experiment_name,
51
+ path=Path(experiment_dir).resolve(),
54
52
  type="training",
55
53
  size=data.shape[0],
56
54
  corr_threshold=corr_threshold,
@@ -59,6 +57,15 @@ def create_dataset(
59
57
  **groups,
60
58
  **dates,
61
59
  targets=targets,
60
+ context={
61
+ "corr_threshold": corr_threshold,
62
+ "percentile": percentile,
63
+ "max_features": max_features,
64
+ "date_column": date_column,
65
+ "group_column": group_column,
66
+ "experiment_name": experiment_name,
67
+ **kwargs,
68
+ },
62
69
  )
63
70
 
64
- return dataset
71
+ return experiment
@@ -61,7 +61,7 @@ from lecrapaud.integrations.openai_integration import (
61
61
  )
62
62
  from lecrapaud.feature_selection import get_features_by_types
63
63
  from lecrapaud.utils import logger
64
- from lecrapaud.db import Target, Feature, Dataset
64
+ from lecrapaud.db import Target, Feature, Experiment
65
65
  from lecrapaud.config import PYTHON_ENV
66
66
 
67
67
 
@@ -308,7 +308,7 @@ class PreprocessFeature:
308
308
  def __init__(
309
309
  self,
310
310
  data: pd.DataFrame,
311
- dataset,
311
+ experiment,
312
312
  time_series: bool = False,
313
313
  date_column: str | None = None,
314
314
  group_column: str | None = None,
@@ -326,7 +326,7 @@ class PreprocessFeature:
326
326
  self.data = data
327
327
  self.data.columns = self.data.columns.str.upper()
328
328
 
329
- self.dataset = dataset
329
+ self.experiment = experiment
330
330
  self.columns_pca = columns_pca
331
331
  self.columns_onehot = columns_onehot
332
332
  self.columns_binary = columns_binary
@@ -341,10 +341,10 @@ class PreprocessFeature:
341
341
  self.val_size = val_size
342
342
  self.test_size = test_size
343
343
 
344
- self.dataset_dir = self.dataset.path
345
- self.dataset_id = self.dataset.id
346
- self.data_dir = f"{self.dataset_dir}/data"
347
- self.preprocessing_dir = f"{self.dataset_dir}/preprocessing"
344
+ self.experiment_dir = self.experiment.path
345
+ self.experiment_id = self.experiment.id
346
+ self.data_dir = f"{self.experiment_dir}/data"
347
+ self.preprocessing_dir = f"{self.experiment_dir}/preprocessing"
348
348
 
349
349
  def run(self):
350
350
  # Split
@@ -358,8 +358,8 @@ class PreprocessFeature:
358
358
 
359
359
  # PCA
360
360
  train, pcas = self.add_pca_features(train)
361
- val, _ = self.add_pca_features(test, pcas=pcas)
362
- test, _ = self.add_pca_features(val, pcas=pcas)
361
+ val, _ = self.add_pca_features(val, pcas=pcas)
362
+ test, _ = self.add_pca_features(test, pcas=pcas)
363
363
 
364
364
  joblib.dump(pcas, f"{self.preprocessing_dir}/pcas.pkl")
365
365
 
@@ -377,7 +377,7 @@ class PreprocessFeature:
377
377
  joblib.dump(self.data, f"{self.data_dir}/full.pkl")
378
378
  joblib.dump(transformer, f"{self.preprocessing_dir}/column_transformer.pkl")
379
379
  summary = summarize_dataframe(train)
380
- summary.to_csv(f"{self.dataset_dir}/feature_summary.csv", index=False)
380
+ summary.to_csv(f"{self.experiment_dir}/feature_summary.csv", index=False)
381
381
 
382
382
  return train, val, test
383
383
 
@@ -431,9 +431,9 @@ class PreprocessFeature:
431
431
  f"{data.shape} {name} data from {dates[f"{name}_start_date"].strftime('%d/%m/%Y')} to {dates[f"{name}_end_date"].strftime('%d/%m/%Y')}"
432
432
  )
433
433
 
434
- Dataset.update(
434
+ Experiment.upsert(
435
435
  match_fields=["id"],
436
- id=self.dataset_id,
436
+ id=self.experiment_id,
437
437
  train_size=len(train),
438
438
  val_size=len(val),
439
439
  test_size=len(test),
@@ -454,7 +454,7 @@ class PreprocessFeature:
454
454
  Splits a DataFrame into train, validation, and test sets.
455
455
 
456
456
  Parameters:
457
- df (pd.DataFrame): The full dataset
457
+ df (pd.DataFrame): The full experiment
458
458
  val_size (float): Proportion of validation set (default 0.1)
459
459
  test_size (float): Proportion of test set (default 0.1)
460
460
  random_state (int): Random seed for reproducibility
@@ -646,23 +646,52 @@ class PreprocessFeature:
646
646
 
647
647
  # Insert features in db
648
648
  if save_in_db:
649
- # TODO: in bulk
649
+ # Get feature types from transformed data
650
650
  categorical_features, numerical_features = get_features_by_types(
651
651
  X_transformed
652
652
  )
653
- for feature in categorical_features:
654
- Feature.upsert(match_fields=["name"], name=feature, type="categorical")
655
- for feature in numerical_features:
656
- Feature.upsert(match_fields=["name"], name=feature, type="numerical")
657
- for target in y.columns:
658
- target_number = int(target.split("_")[1])
659
- type = (
653
+
654
+ # Get column names from DataFrames
655
+ cat_feature_names = categorical_features.columns.tolist()
656
+ num_feature_names = numerical_features.columns.tolist()
657
+
658
+ # Combine all feature names and their types
659
+ all_feature_names = cat_feature_names + num_feature_names
660
+ all_feature_types = ["categorical"] * len(cat_feature_names) + [
661
+ "numerical"
662
+ ] * len(num_feature_names)
663
+
664
+ # Upsert features in bulk if we have any features
665
+ if all_feature_names:
666
+ Feature.upsert_bulk(
667
+ match_fields=["name"],
668
+ name=all_feature_names,
669
+ type=all_feature_types,
670
+ )
671
+
672
+ # Upsert targets in bulk
673
+ target_names = y.columns.tolist()
674
+ target_types = [
675
+ (
660
676
  "classification"
661
- if target_number in self.target_clf
677
+ if int(target.split("_")[1]) in self.target_clf
662
678
  else "regression"
663
679
  )
664
- # TODO: what about description here ?
665
- Target.upsert(match_fields=["name", "type"], name=target, type=type)
680
+ for target in target_names
681
+ ]
682
+
683
+ Target.upsert_bulk(
684
+ match_fields=["name"], name=target_names, type=target_types
685
+ )
686
+
687
+ # Get all the upserted objects
688
+ targets = Target.filter(name__in=target_names)
689
+
690
+ # Update experiment with targets
691
+ experiment = Experiment.get(self.experiment_id)
692
+ if experiment:
693
+ experiment.targets = targets
694
+ experiment.save()
666
695
 
667
696
  return pd.concat([X_transformed, y], axis=1), transformer
668
697
 
@@ -41,7 +41,7 @@ from lecrapaud.directories import tmp_dir, clean_directory
41
41
  from lecrapaud.utils import logger
42
42
  from lecrapaud.config import PYTHON_ENV
43
43
  from lecrapaud.db import (
44
- Dataset,
44
+ Experiment,
45
45
  Target,
46
46
  Feature,
47
47
  FeatureSelection,
@@ -54,8 +54,8 @@ from lecrapaud.search_space import all_models
54
54
  warnings.filterwarnings("ignore", category=FutureWarning)
55
55
 
56
56
 
57
- def load_train_data(dataset_dir, target_number, target_type="regression"):
58
- data_dir = f"{dataset_dir}/data"
57
+ def load_train_data(experiment_dir, target_number, target_type="regression"):
58
+ data_dir = f"{experiment_dir}/data"
59
59
 
60
60
  logger.info("Loading data...")
61
61
  train = joblib.load(f"{data_dir}/train.pkl")
@@ -74,8 +74,8 @@ def load_train_data(dataset_dir, target_number, target_type="regression"):
74
74
 
75
75
 
76
76
  class FeatureSelectionEngine:
77
- def __init__(self, train, dataset, target_number, target_clf, **kwargs):
78
- self.dataset = dataset
77
+ def __init__(self, train, experiment, target_number, target_clf, **kwargs):
78
+ self.experiment = experiment
79
79
  self.train = train
80
80
  self.target_number = target_number
81
81
  self.target_clf = target_clf
@@ -83,16 +83,16 @@ class FeatureSelectionEngine:
83
83
  self.target_type = (
84
84
  "classification" if self.target_number in self.target_clf else "regression"
85
85
  )
86
- self.percentile = self.dataset.percentile
87
- self.corr_threshold = self.dataset.corr_threshold
88
- self.max_features = self.dataset.max_features
89
-
90
- self.dataset_dir = self.dataset.path
91
- self.dataset_id = self.dataset.id
92
- self.data_dir = f"{self.dataset_dir}/data"
93
- self.preprocessing_dir = f"{self.dataset_dir}/preprocessing"
86
+ self.percentile = self.experiment.percentile
87
+ self.corr_threshold = self.experiment.corr_threshold
88
+ self.max_features = self.experiment.max_features
89
+
90
+ self.experiment_dir = self.experiment.path
91
+ self.experiment_id = self.experiment.id
92
+ self.data_dir = f"{self.experiment_dir}/data"
93
+ self.preprocessing_dir = f"{self.experiment_dir}/preprocessing"
94
94
  self.fs_dir_target = (
95
- f"{self.dataset_dir}/{f"TARGET_{self.target_number}"}/feature_selection"
95
+ f"{self.experiment_dir}/{f"TARGET_{self.target_number}"}/feature_selection"
96
96
  )
97
97
  os.makedirs(self.fs_dir_target, exist_ok=True)
98
98
 
@@ -119,12 +119,14 @@ class FeatureSelectionEngine:
119
119
  max_features = self.max_features
120
120
 
121
121
  feature_selection = FeatureSelection.upsert(
122
- match_fields=["target_id", "dataset_id"],
122
+ match_fields=["target_id", "experiment_id"],
123
123
  target_id=target.id,
124
- dataset_id=self.dataset_id,
124
+ experiment_id=self.experiment_id,
125
125
  )
126
126
 
127
- if feature_selection.best_features_path:
127
+ if feature_selection.best_features_path and os.path.exists(
128
+ feature_selection.best_features_path
129
+ ):
128
130
  return joblib.load(feature_selection.best_features_path)
129
131
 
130
132
  self.X = self.train.loc[:, ~self.train.columns.str.contains("^TARGET_")]
@@ -767,7 +769,7 @@ class PreprocessModel:
767
769
  train,
768
770
  val,
769
771
  test,
770
- dataset,
772
+ experiment,
771
773
  target_numbers,
772
774
  target_clf,
773
775
  models_idx,
@@ -777,7 +779,10 @@ class PreprocessModel:
777
779
  date_column,
778
780
  **kwargs,
779
781
  ):
780
- self.dataset = dataset
782
+ self.train = train
783
+ self.val = val
784
+ self.test = test
785
+ self.experiment = experiment
781
786
  self.target_numbers = target_numbers
782
787
  self.target_clf = target_clf
783
788
  self.models_idx = models_idx
@@ -786,13 +791,16 @@ class PreprocessModel:
786
791
  self.group_column = group_column
787
792
  self.date_column = date_column
788
793
 
789
- self.dataset_dir = dataset.path
790
- self.data_dir = f"{self.dataset_dir}/data"
791
- self.preprocessing_dir = f"{self.dataset_dir}/preprocessing"
794
+ self.experiment_dir = experiment.path
795
+ self.data_dir = f"{self.experiment_dir}/data"
796
+ self.preprocessing_dir = f"{self.experiment_dir}/preprocessing"
792
797
 
793
- self.all_features = dataset.get_all_features(
798
+ self.all_features = experiment.get_all_features(
794
799
  date_column=date_column, group_column=group_column
795
800
  )
801
+
802
+ def run(self):
803
+ # save data
796
804
  columns_to_keep = self.all_features + [
797
805
  f"TARGET_{i}" for i in self.target_numbers
798
806
  ]
@@ -801,15 +809,9 @@ class PreprocessModel:
801
809
  ]
802
810
  if duplicates:
803
811
  raise ValueError(f"Doublons détectés dans columns_to_keep: {duplicates}")
804
-
805
- self.train = train[columns_to_keep]
806
- if isinstance(val, pd.DataFrame):
807
- self.val = val[columns_to_keep]
808
- if isinstance(test, pd.DataFrame):
809
- self.test = test[columns_to_keep]
810
-
811
- def run(self):
812
- # save data
812
+ self.train = self.train[columns_to_keep]
813
+ self.val = self.val[columns_to_keep]
814
+ self.test = self.test[columns_to_keep]
813
815
  joblib.dump(self.train, f"{self.data_dir}/train.pkl")
814
816
  joblib.dump(self.val, f"{self.data_dir}/val.pkl")
815
817
  joblib.dump(self.test, f"{self.data_dir}/test.pkl")
@@ -870,8 +872,11 @@ class PreprocessModel:
870
872
 
871
873
  def inference(self):
872
874
  # self.train is new data here
875
+ columns_to_keep = self.all_features
876
+ self.train = self.train[columns_to_keep]
877
+
873
878
  scaler_x = joblib.load(f"{self.preprocessing_dir}/scaler_x.pkl")
874
- scaled_data = scaler_x.transform(self.train)
879
+ scaled_data = scaler_x.transform(self.train) # TODO: utiliser scale_data
875
880
  scaled_data = pd.DataFrame(
876
881
  scaled_data, columns=self.train.columns, index=self.train.index
877
882
  )
@@ -1012,7 +1017,7 @@ class PreprocessModel:
1012
1017
  df_reshaped = df.apply(list, axis=1).apply(lambda x: [list(x)])
1013
1018
  df_reshaped = pd.concat([df_reshaped, group_series], axis=1)
1014
1019
 
1015
- logger.info("Grouping method stock and creating timesteps...")
1020
+ logger.info("Grouping features and creating timesteps...")
1016
1021
  df_reshaped = (
1017
1022
  df_reshaped.groupby(group_column)[0]
1018
1023
  .apply(lambda x: shiftsum(x, timesteps))
@@ -1058,13 +1063,13 @@ class PreprocessModel:
1058
1063
  def feature_selection_analysis(feature_selection_id: int, n_components: int = 5):
1059
1064
 
1060
1065
  feature_selection = FeatureSelection.get(feature_selection_id)
1061
- dataset_dir = feature_selection.dataset.path
1066
+ experiment_dir = feature_selection.experiment.path
1062
1067
  features = [f.name for f in feature_selection.features]
1063
1068
  target = feature_selection.target.name
1064
1069
  target_number = target.split("_")[1]
1065
1070
 
1066
1071
  train, val, train_scaled, val_scaled, _scaler_y = load_train_data(
1067
- dataset_dir, target_number, target_type=feature_selection.target.type
1072
+ experiment_dir, target_number, target_type=feature_selection.target.type
1068
1073
  )
1069
1074
  train = train[features + [target]]
1070
1075
  train_scaled = train_scaled[features + [target]]
lecrapaud/jobs/tasks.py CHANGED
@@ -2,7 +2,7 @@ from lecrapaud.jobs import app
2
2
 
3
3
  # from honeybadger import honeybadger
4
4
  from lecrapaud.send_daily_emails import send_daily_emails
5
- from lecrapaud.config import DATASET_ID, RECEIVER_EMAIL
5
+ from lecrapaud.config import EXPERIMENT_ID, RECEIVER_EMAIL
6
6
  from lecrapaud.training import run_training
7
7
  from lecrapaud.constants import stock_list_3
8
8
  from lecrapaud.search_space import get_models_idx
@@ -18,9 +18,9 @@ from lecrapaud.search_space import get_models_idx
18
18
  def task_send_daily_emails(self):
19
19
  try:
20
20
  print(f"[Attempt #{self.request.retries}] task_send_daily_emails")
21
- dataset_id = int(DATASET_ID)
21
+ experiment_id = int(EXPERIMENT_ID)
22
22
  email = RECEIVER_EMAIL
23
- return send_daily_emails(email, dataset_id)
23
+ return send_daily_emails(email, experiment_id)
24
24
  except Exception as e:
25
25
  print(e)
26
26
  # honeybadger.notify(e)
@@ -49,7 +49,7 @@ def task_training_experiment(self):
49
49
  perform_hyperoptimization=True,
50
50
  perform_crossval=False,
51
51
  preserve_model=False,
52
- session_name="20y_stock_list_3_linear_xgb",
52
+ experiment_name="20y_stock_list_3_linear_xgb",
53
53
  )
54
54
  except Exception as e:
55
55
  print(e)