autogluon.timeseries 1.0.0b20231124__py3-none-any.whl → 1.0.0b20231125__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

@@ -5,6 +5,7 @@ import itertools
5
5
  import logging
6
6
  import reprlib
7
7
  from collections.abc import Iterable
8
+ from itertools import islice
8
9
  from pathlib import Path
9
10
  from typing import Any, List, Optional, Tuple, Type, Union
10
11
 
@@ -849,6 +850,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
849
850
  freq: Union[str, pd.DateOffset],
850
851
  agg_numeric: str = "mean",
851
852
  agg_categorical: str = "first",
853
+ num_cpus: int = -1,
854
+ chunk_size: int = 100,
852
855
  **kwargs,
853
856
  ) -> TimeSeriesDataFrame:
854
857
  """Convert each time series in the data frame to the given frequency.
@@ -858,6 +861,10 @@ class TimeSeriesDataFrame(pd.DataFrame):
858
861
  1. Converting an irregularly-sampled time series to a regular time index.
859
862
  2. Aggregating time series data by downsampling (e.g., convert daily sales into weekly sales)
860
863
 
864
+ Standard ``df.groupby(...).resample(...)`` can be extremely slow for large datasets, so we parallelize this
865
+ operation across multiple CPU cores.
866
+
867
+
861
868
  Parameters
862
869
  ----------
863
870
  freq : Union[str, pd.DateOffset]
@@ -867,6 +874,10 @@ class TimeSeriesDataFrame(pd.DataFrame):
867
874
  Aggregation method applied to numeric columns.
868
875
  agg_categorical : {"first", "last"}, default = "first"
869
876
  Aggregation method applied to categorical columns.
877
+ num_cpus : int, default = -1
878
+ Number of CPU cores used when resampling in parallel. Set to -1 to use all cores.
879
+ chunk_size : int, default = 100
880
+ Number of time series in a chunk assigned to each parallel worker.
870
881
  **kwargs
871
882
  Additional keywords arguments that will be passed to ``pandas.DataFrameGroupBy.resample``.
872
883
 
@@ -928,7 +939,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
928
939
  0 2020-12-31 10.0
929
940
  2021-12-31 26.0
930
941
  """
931
- if self.freq == pd.tseries.frequencies.to_offset(freq).freqstr:
942
+ offset = pd.tseries.frequencies.to_offset(freq)
943
+ if self.freq == offset.freqstr:
932
944
  return self
933
945
 
934
946
  # We need to aggregate categorical columns separately because .agg("mean") deletes all non-numeric columns
@@ -939,9 +951,23 @@ class TimeSeriesDataFrame(pd.DataFrame):
939
951
  else:
940
952
  aggregation[col] = agg_categorical
941
953
 
942
- resampled_df = TimeSeriesDataFrame(
943
- self.groupby(level=ITEMID, sort=False).resample(freq, level=TIMESTAMP, **kwargs).agg(aggregation)
944
- )
954
+ def split_into_chunks(iterable: Iterable, size: int) -> Iterable[Iterable]:
955
+ # Based on https://stackoverflow.com/a/22045226/5497447
956
+ iterable = iter(iterable)
957
+ return iter(lambda: tuple(islice(iterable, size)), ())
958
+
959
+ def resample_chunk(chunk: Iterable[Tuple[str, pd.DataFrame]]) -> pd.DataFrame:
960
+ resampled_dfs = []
961
+ for item_id, df in chunk:
962
+ resampled_df = df.resample(offset, level=TIMESTAMP, **kwargs).agg(aggregation)
963
+ resampled_dfs.append(pd.concat({item_id: resampled_df}, names=[ITEMID]))
964
+ return pd.concat(resampled_dfs)
965
+
966
+ # Resampling time for 1 item < overhead time for a single parallel job. Therefore, we group items into chunks
967
+ # so that the speedup from parallelization isn't dominated by the communication costs.
968
+ chunks = split_into_chunks(pd.DataFrame(self).groupby(level=ITEMID, sort=False), chunk_size)
969
+ resampled_chunks = Parallel(n_jobs=num_cpus)(delayed(resample_chunk)(chunk) for chunk in chunks)
970
+ resampled_df = TimeSeriesDataFrame(pd.concat(resampled_chunks))
945
971
  resampled_df.static_features = self.static_features
946
972
  return resampled_df
947
973
 
@@ -283,6 +283,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
283
283
  init_args = self._get_estimator_init_args()
284
284
 
285
285
  default_trainer_kwargs = {
286
+ "limit_val_batches": 3,
286
287
  "max_epochs": init_args["max_epochs"],
287
288
  "callbacks": init_args["callbacks"],
288
289
  "enable_progress_bar": False,
@@ -1,3 +1,3 @@
1
1
  """This is the autogluon version file."""
2
- __version__ = '1.0.0b20231124'
2
+ __version__ = '1.0.0b20231125'
3
3
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 1.0.0b20231124
3
+ Version: 1.0.0b20231125
4
4
  Summary: AutoML for Image, Text, and Tabular Data
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -50,9 +50,9 @@ Requires-Dist: utilsforecast <0.0.11,>=0.0.10
50
50
  Requires-Dist: tqdm <5,>=4.38
51
51
  Requires-Dist: orjson ~=3.9
52
52
  Requires-Dist: tensorboard <3,>=2.9
53
- Requires-Dist: autogluon.core[raytune] ==1.0.0b20231124
54
- Requires-Dist: autogluon.common ==1.0.0b20231124
55
- Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost] ==1.0.0b20231124
53
+ Requires-Dist: autogluon.core[raytune] ==1.0.0b20231125
54
+ Requires-Dist: autogluon.common ==1.0.0b20231125
55
+ Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost] ==1.0.0b20231125
56
56
  Provides-Extra: all
57
57
  Provides-Extra: tests
58
58
  Requires-Dist: pytest ; extra == 'tests'
@@ -1,14 +1,14 @@
1
- autogluon.timeseries-1.0.0b20231124-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
1
+ autogluon.timeseries-1.0.0b20231125-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
2
2
  autogluon/timeseries/__init__.py,sha256=_CrLLc1fkjen7UzWoO0Os8WZoHOgvZbHKy46I8v_4k4,304
3
3
  autogluon/timeseries/evaluator.py,sha256=l642tYfTHsl8WVIq_vV6qhgAFVFr9UuZD7gLra3A_Kc,250
4
4
  autogluon/timeseries/learner.py,sha256=HVfsoWTG3dXBCc7JbPfHCCYCMwL3zlrqHwLBG33MTJ8,9633
5
5
  autogluon/timeseries/predictor.py,sha256=sohEmnK0Z-sf7zhQRR6i7zTtuTigs0QXQrzhxKx8v9o,59016
6
6
  autogluon/timeseries/splitter.py,sha256=eghGwAAN2_cxGk5aJBILgjGWtLzjxJcytMy49gg_q18,3061
7
- autogluon/timeseries/version.py,sha256=-k59F7BtYG5KzVCW8NlMl325YMkn2027VY6iivBRmI4,90
7
+ autogluon/timeseries/version.py,sha256=d9yJ5IbELS1blBNVeuaYKDWdfnjJtkgwPmPd9R_0wec,90
8
8
  autogluon/timeseries/configs/__init__.py,sha256=BTtHIPCYeGjqgOcvqb8qPD4VNX-ICKOg6wnkew1cPOE,98
9
9
  autogluon/timeseries/configs/presets_configs.py,sha256=1u6tbOKJdIRULYDu41dlJwXRNswWsjBDF0aR2YhyMQs,479
10
10
  autogluon/timeseries/dataset/__init__.py,sha256=UvnhAN5tjgxXTHoZMQDy64YMDj4Xxa68yY7NP4vAw0o,81
11
- autogluon/timeseries/dataset/ts_dataframe.py,sha256=gbYz6kwA6DRIPw2ijuWV4CneDPKQO_Zx6ildCSMfV2E,42929
11
+ autogluon/timeseries/dataset/ts_dataframe.py,sha256=PgOz-88hbxNnhbpp0DMJbGBdtM6wIB32YpPWdyROB1c,44424
12
12
  autogluon/timeseries/metrics/__init__.py,sha256=gzvHptT-UdvB26CLOoFIznaKT-5FDwuVO37gaYPp88o,1835
13
13
  autogluon/timeseries/metrics/abstract.py,sha256=-muJuc30zSqHYXNBYyGocL-4zT7bt4SRjW9ddWcCq9w,8069
14
14
  autogluon/timeseries/metrics/point.py,sha256=WdhUrKB0ilO_N9-jHljQBQOj8mDvlNCfwMAD0RO61kI,11277
@@ -26,7 +26,7 @@ autogluon/timeseries/models/ensemble/__init__.py,sha256=kFr11Gmt7lQJu9Rr8HuIPphQ
26
26
  autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py,sha256=tifETwmiEGt-YtQ9eNK7ojJ3fBvtFMUJvisbfkIJ7gw,3393
27
27
  autogluon/timeseries/models/ensemble/greedy_ensemble.py,sha256=3xYzg0CIe0U4l-HScVThb-q8wfKCmNB8SwRjRBMkCMU,7369
28
28
  autogluon/timeseries/models/gluonts/__init__.py,sha256=M8PV9ZE4WpteScMobXM6RH1Udb1AZiHHtj2g5GQL3TU,329
29
- autogluon/timeseries/models/gluonts/abstract_gluonts.py,sha256=t6nyLTcvkLYh_xYhHQGu4UK-c7fqdYguQrqzJT2j9Oo,25563
29
+ autogluon/timeseries/models/gluonts/abstract_gluonts.py,sha256=cdzWbJ36vnSIg5TxzRYaOedvtUipbvQLQbsUSfj43ZA,25599
30
30
  autogluon/timeseries/models/gluonts/torch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
31
  autogluon/timeseries/models/gluonts/torch/models.py,sha256=7ktOy6MxEzD0ykhUwcVEufSjdQNwYadtInLN6cms4Ig,18322
32
32
  autogluon/timeseries/models/local/__init__.py,sha256=JyckWWgMG1BTIWJqFTW6e1O-eb0LPPOwtXwmb1ErohQ,756
@@ -48,11 +48,11 @@ autogluon/timeseries/utils/datetime/base.py,sha256=MsqIHY14m3QMjSwwtE7Uo1oNwepWU
48
48
  autogluon/timeseries/utils/datetime/lags.py,sha256=kcU4liKbHj7KP2ajNU-KLZ8OYSU35EgT4kJjZNSw0Zg,5875
49
49
  autogluon/timeseries/utils/datetime/seasonality.py,sha256=kgK_ukw2wCviEB7CZXRVC5HZpBJZu9IsRrvCJ9E_rOE,755
50
50
  autogluon/timeseries/utils/datetime/time_features.py,sha256=pROkYyxETQ8rHKfPGhf2paB73C7rWJ2Ui0cCswLqbBg,2562
51
- autogluon.timeseries-1.0.0b20231124.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
52
- autogluon.timeseries-1.0.0b20231124.dist-info/METADATA,sha256=S1f5aKQr741Y-QzUCMNAH6W1PEp5e7LNRSvz3g6SKbQ,13324
53
- autogluon.timeseries-1.0.0b20231124.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
54
- autogluon.timeseries-1.0.0b20231124.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
55
- autogluon.timeseries-1.0.0b20231124.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
56
- autogluon.timeseries-1.0.0b20231124.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
57
- autogluon.timeseries-1.0.0b20231124.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
58
- autogluon.timeseries-1.0.0b20231124.dist-info/RECORD,,
51
+ autogluon.timeseries-1.0.0b20231125.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
52
+ autogluon.timeseries-1.0.0b20231125.dist-info/METADATA,sha256=eFmR7JN0SI0xEjL_5c6lQRxNuhHNz8ClyNr0F46Od-w,13324
53
+ autogluon.timeseries-1.0.0b20231125.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
54
+ autogluon.timeseries-1.0.0b20231125.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
55
+ autogluon.timeseries-1.0.0b20231125.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
56
+ autogluon.timeseries-1.0.0b20231125.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
57
+ autogluon.timeseries-1.0.0b20231125.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
58
+ autogluon.timeseries-1.0.0b20231125.dist-info/RECORD,,