dlm-murabei-package 1.0.0b0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,142 @@
1
+ Metadata-Version: 2.4
2
+ Name: dlm-murabei-package
3
+ Version: 1.0.0b0
4
+ Summary:
5
+ Author: Áurea Fonseca
6
+ Author-email: aurea.fonseca@murabei.com
7
+ Requires-Python: >=3.12
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Classifier: Programming Language :: Python :: 3.13
11
+ Classifier: Programming Language :: Python :: 3.14
12
+ Requires-Dist: numpy (==2.3.2)
13
+ Requires-Dist: pandas (==2.3.1)
14
+ Requires-Dist: pyarrow (==21.0.0)
15
+ Requires-Dist: scipy (==1.16.1)
16
+ Requires-Dist: statsmodels (==0.14.5)
17
+ Requires-Dist: tqdm (==4.67.1)
18
+ Description-Content-Type: text/markdown
19
+
20
+ # murabei_models — Dynamic Linear Models (DLM)
21
+ Pacote Python para modelos DLM com regressoras exógenas, incluindo ajuste, previsão, atualização online e backtesting, voltado para séries temporais de negócios.
22
+
23
+ ## Instalação
24
+ - Fonte local (desenvolvimento):
25
+ - git clone <URL_DO_REPO>
26
+ - cd <PASTA_DO_REPO>
27
+ - pip install -r src/requirements/python_requirements.txt
28
+ - bash build.bash
29
+ - pip install dist/murabei_models-<VERSION>.tar.gz
30
+
31
+ - Requisitos principais (definidos no pyproject.toml e requirements):
32
+ - pandas==2.3.1
33
+ - numpy==2.3.2
34
+ - statsmodels==0.14.5
35
+ - scipy==1.16.1
36
+ - tqdm==4.67.1
37
+ - pyarrow==21.0.0
38
+ - dynm (via URL zip: https://github.com/EduardoGPinheiro/dynm/archive/refs/tags/v0.0.2.zip)
39
+
40
+ Observações:
41
+ - Garanta acesso à Internet para instalar a dependência dynm via URL.
42
+ - Python mínimo recomendado conforme o ambiente do projeto (definir python_requires no empacotamento, se aplicável).
43
+
44
+ ## Exemplo rápido
45
+ Exemplo mínimo baseado em src/dlm/zzz__model_example.py com fit, predict, update e cross-validate:
46
+
47
+ ```python
48
+ import numpy as np
49
+ import pandas as pd
50
+ from dlm.model import DynamicLinearModel
51
+
52
+ # Dados sintéticos
53
+ np.random.seed(123)
54
+ n = 24
55
+ time = pd.date_range(start="2020-01-01", periods=n, freq="MS")
56
+ x1 = np.linspace(0, 1, n) + 0.1 * np.random.randn(n)
57
+ x2 = np.linspace(1, 0, n) + 0.1 * np.random.randn(n)
58
+ y = 0.7 * x1 - 0.4 * x2 + 0.1 * np.random.randn(n)
59
+
60
+ data = pd.DataFrame({"time": time, "y": y, "x1": x1, "x2": x2}).set_index("time")
61
+ y = data[["y"]]
62
+ X = data[["x1", "x2"]]
63
+
64
+ # Instancia e ajusta
65
+ dlm_model = DynamicLinearModel(
66
+ trend=False,
67
+ monitoring=True,
68
+ start_predictions_at=12,
69
+ )
70
+ dlm_model.fit(X=X, y=y)
71
+
72
+ # Predição
73
+ X_new = np.array([[1.15, 0.05], [1.20, 0.04], [1.25, 0.03]])
74
+ y_pred = dlm_model.predict(X=X_new)
75
+
76
+ # Atualização online
77
+ y_new = 0.72
78
+ X_new = np.array([[1.18, 0.06]])
79
+ dlm_model.update(y=y_new, X=X_new)
80
+
81
+ # Backtesting / Cross-validation
82
+ dlm_model.cross_validate(X=X, y=y, K=12)
83
+
84
+ # Acessos úteis (atributos/métodos)
85
+ _ = dlm_model.get_predictive
86
+ _ = dlm_model.get_params
87
+ _ = dlm_model.get_monitor
88
+ _ = dlm_model.summary
89
+ _ = dlm_model.llk
90
+ ```
91
+
92
+ ## Estrutura do projeto
93
+ - src/dlm/
94
+ - model.py: implementação principal do modelo DLM
95
+ - data/aux.py: utilitários de dados
96
+ - update/intervention.py: rotinas de atualização/intervenções
97
+ - utils/summary.py: sumários e utilidades
98
+ - zzz__model_example.py: exemplo completo de uso
99
+
100
+ - src/tests/
101
+ - test_template.py e data/00__data.parquet para reprodução local
102
+
103
+ - Empacotamento:
104
+ - Layout src/, MANIFEST.in e package_data incluindo arquivos .parquet (dlm.data.examples)
105
+
106
+ - Automação e scripts:
107
+ - bitbucket-pipelines.yml, build.sh, check_local_linter.bash
108
+
109
+ - Versionamento:
110
+ - VERSION (usado no setup.py)
111
+ - changelog.md
112
+
113
+ ## Desenvolvimento
114
+ - Estilo e lint:
115
+ - Ruff habilitado com: Pyflakes (F), pycodestyle (E, W), pydocstyle (D, convenção Google), isort (I), pandas-vet (PD), pep8-naming (N), bandit (S)
116
+ - Largura de linha: 79
117
+ - ignore: E402, N806, I001
118
+
119
+ - Fluxo sugerido:
120
+ - Criar venv e instalar localmente: pip install -e .
121
+ - Lint: ./check_local_linter.bash
122
+ - Testes: pytest em src/tests
123
+
124
+ ## Guia de uso
125
+ - Importação principal:
126
+ - from dlm.model import DynamicLinearModel
127
+
128
+ - Fluxo típico:
129
+ - Preparar X (regressoras) e y (alvo) como pandas/numpy
130
+ - Ajustar: model.fit(X=X, y=y)
131
+ - Prever: model.predict(X=X_new)
132
+ - Atualizar online: model.update(y=y_new, X=X_new)
133
+ - Backtesting: model.cross_validate(X=X, y=y, K=...)
134
+
135
+ - Atributos e diagnósticos:
136
+ - model.get_predictive, model.get_params, model.get_monitor, model.summary, model.llk
137
+
138
+ ## Autores
139
+ Áurea Fonseca, Eduardo Pinheiro, Izabel Nolau e Lucas Ribeiro.
140
+
141
+ ## Build local
142
+
@@ -0,0 +1,122 @@
1
+ # murabei_models — Dynamic Linear Models (DLM)
2
+ Pacote Python para modelos DLM com regressoras exógenas, incluindo ajuste, previsão, atualização online e backtesting, voltado para séries temporais de negócios.
3
+
4
+ ## Instalação
5
+ - Fonte local (desenvolvimento):
6
+ - git clone <URL_DO_REPO>
7
+ - cd <PASTA_DO_REPO>
8
+ - pip install -r src/requirements/python_requirements.txt
9
+ - bash build.bash
10
+ - pip install dist/murabei_models-<VERSION>.tar.gz
11
+
12
+ - Requisitos principais (definidos no pyproject.toml e requirements):
13
+ - pandas==2.3.1
14
+ - numpy==2.3.2
15
+ - statsmodels==0.14.5
16
+ - scipy==1.16.1
17
+ - tqdm==4.67.1
18
+ - pyarrow==21.0.0
19
+ - dynm (via URL zip: https://github.com/EduardoGPinheiro/dynm/archive/refs/tags/v0.0.2.zip)
20
+
21
+ Observações:
22
+ - Garanta acesso à Internet para instalar a dependência dynm via URL.
23
+ - Python mínimo recomendado conforme o ambiente do projeto (definir python_requires no empacotamento, se aplicável).
24
+
25
+ ## Exemplo rápido
26
+ Exemplo mínimo baseado em src/dlm/zzz__model_example.py com fit, predict, update e cross-validate:
27
+
28
+ ```python
29
+ import numpy as np
30
+ import pandas as pd
31
+ from dlm.model import DynamicLinearModel
32
+
33
+ # Dados sintéticos
34
+ np.random.seed(123)
35
+ n = 24
36
+ time = pd.date_range(start="2020-01-01", periods=n, freq="MS")
37
+ x1 = np.linspace(0, 1, n) + 0.1 * np.random.randn(n)
38
+ x2 = np.linspace(1, 0, n) + 0.1 * np.random.randn(n)
39
+ y = 0.7 * x1 - 0.4 * x2 + 0.1 * np.random.randn(n)
40
+
41
+ data = pd.DataFrame({"time": time, "y": y, "x1": x1, "x2": x2}).set_index("time")
42
+ y = data[["y"]]
43
+ X = data[["x1", "x2"]]
44
+
45
+ # Instancia e ajusta
46
+ dlm_model = DynamicLinearModel(
47
+ trend=False,
48
+ monitoring=True,
49
+ start_predictions_at=12,
50
+ )
51
+ dlm_model.fit(X=X, y=y)
52
+
53
+ # Predição
54
+ X_new = np.array([[1.15, 0.05], [1.20, 0.04], [1.25, 0.03]])
55
+ y_pred = dlm_model.predict(X=X_new)
56
+
57
+ # Atualização online
58
+ y_new = 0.72
59
+ X_new = np.array([[1.18, 0.06]])
60
+ dlm_model.update(y=y_new, X=X_new)
61
+
62
+ # Backtesting / Cross-validation
63
+ dlm_model.cross_validate(X=X, y=y, K=12)
64
+
65
+ # Acessos úteis (atributos/métodos)
66
+ _ = dlm_model.get_predictive
67
+ _ = dlm_model.get_params
68
+ _ = dlm_model.get_monitor
69
+ _ = dlm_model.summary
70
+ _ = dlm_model.llk
71
+ ```
72
+
73
+ ## Estrutura do projeto
74
+ - src/dlm/
75
+ - model.py: implementação principal do modelo DLM
76
+ - data/aux.py: utilitários de dados
77
+ - update/intervention.py: rotinas de atualização/intervenções
78
+ - utils/summary.py: sumários e utilidades
79
+ - zzz__model_example.py: exemplo completo de uso
80
+
81
+ - src/tests/
82
+ - test_template.py e data/00__data.parquet para reprodução local
83
+
84
+ - Empacotamento:
85
+ - Layout src/, MANIFEST.in e package_data incluindo arquivos .parquet (dlm.data.examples)
86
+
87
+ - Automação e scripts:
88
+ - bitbucket-pipelines.yml, build.sh, check_local_linter.bash
89
+
90
+ - Versionamento:
91
+ - VERSION (usado no setup.py)
92
+ - changelog.md
93
+
94
+ ## Desenvolvimento
95
+ - Estilo e lint:
96
+ - Ruff habilitado com: Pyflakes (F), pycodestyle (E, W), pydocstyle (D, convenção Google), isort (I), pandas-vet (PD), pep8-naming (N), bandit (S)
97
+ - Largura de linha: 79
98
+ - ignore: E402, N806, I001
99
+
100
+ - Fluxo sugerido:
101
+ - Criar venv e instalar localmente: pip install -e .
102
+ - Lint: ./check_local_linter.bash
103
+ - Testes: pytest em src/tests
104
+
105
+ ## Guia de uso
106
+ - Importação principal:
107
+ - from dlm.model import DynamicLinearModel
108
+
109
+ - Fluxo típico:
110
+ - Preparar X (regressoras) e y (alvo) como pandas/numpy
111
+ - Ajustar: model.fit(X=X, y=y)
112
+ - Prever: model.predict(X=X_new)
113
+ - Atualizar online: model.update(y=y_new, X=X_new)
114
+ - Backtesting: model.cross_validate(X=X, y=y, K=...)
115
+
116
+ - Atributos e diagnósticos:
117
+ - model.get_predictive, model.get_params, model.get_monitor, model.summary, model.llk
118
+
119
+ ## Autores
120
+ Áurea Fonseca, Eduardo Pinheiro, Izabel Nolau e Lucas Ribeiro.
121
+
122
+ ## Build local
@@ -0,0 +1,74 @@
1
+ [project]
2
+ name = "dlm-murabei-package"
3
+ version = "1.0.0-b.0"
4
+ description = ""
5
+ authors = [
6
+ {name = "Áurea Fonseca",email = "aurea.fonseca@murabei.com"},
7
+ {name = "Eduardo Pinheiro",email = "eduardo.pinheiro@murabei.com"},
8
+ {name = "Izabel Nolau",email = "izabel.souza@murabei.com"},
9
+ {name = "Lucas Ribeiro Magalhaes",email = "lucas.magalhaes@murabei.com"}
10
+ ]
11
+
12
+ readme = "README.md"
13
+ requires-python = ">=3.12"
14
+ dependencies = [
15
+ "pandas==2.3.1", "numpy==2.3.2", "statsmodels==0.14.5",
16
+ "scipy==1.16.1", "tqdm==4.67.1", "pyarrow==21.0.0"]
17
+
18
+ # "dynm @ https://github.com/EduardoGPinheiro/dynm/archive/refs/tags/v0.0.2.zip"
19
+
20
+
21
+ [tool.poetry]
22
+ packages = [{include = "dlm", from = "src"}]
23
+
24
+ [build-system]
25
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
26
+ build-backend = "poetry.core.masonry.api"
27
+
28
+ [tool.ruff]
29
+ line-length = 79 # Quantidade de caracteres que o linter irá considerar na linha
30
+ indent-width = 4
31
+
32
+ # Pastas onde o linter não atuara. Por padrão ele só lê arquivos python e ignora tudo dentro do .gitignore
33
+ exclude = [
34
+ "secrets/",
35
+ "__init__.py",
36
+ "setup_template.py",
37
+ "setup.py"
38
+ ]
39
+
40
+ # Lista de linters que serão aplicados. o ruff da suporte a vários e eles podem ser adicionados aqui.
41
+ [tool.ruff.lint]
42
+ select = [
43
+ "F", # Pyflakes - Procura bugs básicos no codigo
44
+ "E", # pycodestyle - Pep8
45
+ "W", # pycodestyle - Pep8
46
+ "D", # pydocstyle - Docstring
47
+ "I", # Isort - Imports
48
+ "PD", # pandas-vet - linter de pandas (não deixa usar inplace nem df como nome de variavel)
49
+ "N", # pep8-naming - nomes seguindo pep8
50
+ "S", # flake8-bandit - linter de vulnerabilidade (nmao deixa usar assert)
51
+ ]
52
+
53
+ # Os códigos de erros colocados aqui são ignorados pelo linter. O erro abaixo é em relação ao assert. Está comentado por padrão.
54
+ ignore = [
55
+ # "S101" # Use of `assert` detected
56
+ "E402", "N806", "I001"
57
+ ]
58
+
59
+ # ativa um modo plus do ruff
60
+ preview = true
61
+
62
+ [tool.ruff.format]
63
+ indent-style = "space"
64
+
65
+ [tool.ruff.lint.pydocstyle]
66
+ convention = "google" # seleciona as docstrings do google como padrão
67
+
68
+ [tool.ruff.lint.isort]
69
+ no-sections = true
70
+
71
+ [dependency-groups]
72
+ docs = [
73
+ "mkdocs (>=1.6.1,<2.0.0)"
74
+ ]
File without changes
File without changes
@@ -0,0 +1,287 @@
1
+ """Auxiliary functions for data manipulation."""
2
+ import numpy as np
3
+ import pandas as pd
4
+ from pandas import DatetimeIndex
5
+ from typing import Union, Tuple, List
6
+
7
+
8
+ def custom_mean(y: np.array) -> np.ndarray | float:
9
+ """Compute the mean of an array with NaN handling.
10
+
11
+ This function computes the mean of a NumPy array while ignoring NaN values.
12
+ If all values are NaN, the result defaults to 0.
13
+ Works for both 1D and higher-dimensional arrays.
14
+
15
+ Args:
16
+ y (np.ndarray): Input array of numeric values
17
+ that may contain NaN values.
18
+
19
+ Returns:
20
+ float | np.ndarray:
21
+ - If `y` is 1D, returns a single float value representing the mean.
22
+ - If `y` has more than 1 dimension, returns an array with means
23
+ along axis=0, replacing columns that contain only NaN
24
+ values with 0.
25
+
26
+ Raises:
27
+ ValueError: If the input is not a NumPy array.
28
+ """
29
+ if not isinstance(y, np.ndarray):
30
+ raise ValueError("Input must be a NumPy array.")
31
+
32
+ # Case: 1D array
33
+ if y.ndim == 1:
34
+ # If all values are NaN, return 0
35
+ if np.isnan(y).all():
36
+ mu0 = 0
37
+ else:
38
+ # Compute mean ignoring NaN values
39
+ mu0 = np.nanmean(y)
40
+
41
+ # Case: multi-dimensional array
42
+ elif y.ndim > 1:
43
+ # Compute mean along axis=0 while ignoring NaN
44
+ mu0 = np.nanmean(y, axis=0)
45
+
46
+ # Identify columns where all values are NaN
47
+ all_nan_mask = np.isnan(y).all(axis=0)
48
+
49
+ # Replace NaN means with 0 for those columns
50
+ mu0[all_nan_mask] = 0
51
+
52
+ return mu0
53
+
54
+
55
+ def custom_std(y: np.ndarray) -> np.ndarray | float:
56
+ """Compute the standard deviation of an array with NaN handling.
57
+
58
+ This function computes the standard deviation of a NumPy array while
59
+ ignoring NaN values. If all values are NaN, the result defaults to 1.
60
+ To avoid zero-variance issues, the function enforces a minimum
61
+ return value of 1e-6.
62
+
63
+ Works for both 1D and multi-dimensional arrays.
64
+
65
+ Args:
66
+ y (np.ndarray): Input array of numeric values that may
67
+ contain NaN values.
68
+
69
+ Returns:
70
+ float | np.ndarray:
71
+ - If `y` is 1D, returns a single float value representing
72
+ the standard deviation.
73
+ - If `y` has more than 1 dimension, returns an array of
74
+ standard deviations computed along axis=0, replacing
75
+ columns of all-NaN values with 1.
76
+
77
+ Raises:
78
+ ValueError: If the input is not a NumPy array.
79
+ """
80
+ if not isinstance(y, np.ndarray):
81
+ raise ValueError("Input must be a NumPy array.")
82
+
83
+ # Case: 1D array
84
+ if y.ndim == 1:
85
+ # If all values are NaN, return 1.0
86
+ if np.isnan(y).all():
87
+ s0 = 1.0
88
+ else:
89
+ # Compute standard deviation ignoring NaN values
90
+ s0 = np.nanstd(y)
91
+
92
+ # Case: multi-dimensional array
93
+ elif y.ndim > 1:
94
+ # Compute std along axis=0 while ignoring NaN values
95
+ s0 = np.nanstd(y, axis=0)
96
+
97
+ # Identify columns where all values are NaN
98
+ all_nan_mask = np.isnan(y).all(axis=0)
99
+
100
+ # Replace std values of all-NaN columns with 1.0
101
+ s0[all_nan_mask] = 1.0
102
+
103
+ # Ensure minimum standard deviation is at least 1e-6
104
+ s0 = np.maximum(s0, 1e-6)
105
+
106
+ return s0
107
+
108
+
109
+ def identify_dummies(X: np.ndarray) -> np.ndarray: # noqa: N803
110
+ """Identify dummy-variable columns (0/1-only) in a 2D array.
111
+
112
+ This function returns a boolean mask indicating which columns of the input
113
+ array contain only the binary values 0 and 1. It uses vectorized checks
114
+ with NumPy's isin and a column-wise reduction for efficiency.
115
+
116
+ Args:
117
+ X (np.ndarray): A 2D array where each column is a feature;
118
+ may have any numeric dtype.
119
+
120
+ Returns:
121
+ np.ndarray: A 1D boolean array of shape (n_features,), where True marks
122
+ columns that contain only 0s and 1s, and False otherwise.
123
+
124
+ Raises:
125
+ ValueError: If X is not a NumPy array or is not 2-dimensional.
126
+ """
127
+ if not isinstance(X, np.ndarray):
128
+ raise ValueError("X must be a NumPy ndarray.")
129
+ if X.ndim != 2:
130
+ raise ValueError("X must be 2-dimensional (n_samples, n_features).")
131
+
132
+ # Check membership against {0, 1} for every element,
133
+ # then reduce along rows to mark columns.
134
+ # np.isin returns a boolean array; np.all(..., axis=0)
135
+ # marks columns that are entirely in {0,1}.
136
+ bool_vector = np.all(np.isin(X, [0, 1]), axis=0)
137
+
138
+ return bool_vector
139
+
140
+
141
+ def to_time_df(
142
+ time: Union[np.ndarray, DatetimeIndex, List, None],
143
+ start: int = 1
144
+ ) -> pd.DataFrame:
145
+ """Convert time-like input into a DataFrame with 'time' and sequential 't'.
146
+
147
+ This function converts a time-like input into a DataFrame with two columns:
148
+ 'time' (the provided values as a pandas Series) and 't' (a 1-based
149
+ sequential index starting from `start`). If `time` is None, returns an
150
+ empty DataFrame with only column 't' and zero rows.
151
+
152
+ Args:
153
+ time (np.ndarray | DatetimeIndex | list | None): Time values to convert
154
+ into a DataFrame; accepted inputs include NumPy arrays, pandas
155
+ DatetimeIndex, Python lists, or None.
156
+ start (int): Starting value for the 't' sequence; defaults to 1.
157
+
158
+ Returns:
159
+ pd.DataFrame: A DataFrame with columns:
160
+ - 'time': The input time values as a pandas Series (if provided).
161
+ - 't': A sequence starting at `start` with step 1, aligned to rows.
162
+
163
+ Raises:
164
+ TypeError: If `time` is not None and not an accepted type
165
+ (np.ndarray, DatetimeIndex, or list).
166
+ ValueError: If the provided `time` is not one-dimensional.
167
+
168
+ Notes:
169
+ - When `time` is a DatetimeIndex, it is converted to a pandas Series,
170
+ preserving datetime dtype semantics.
171
+ - The 't' column is derived from the DataFrame's RangeIndex plus
172
+ `start`, producing 1-based-like sequencing without changing the
173
+ actual index.
174
+ """
175
+ # Using an empty Series to construct an empty
176
+ if time is None:
177
+ return pd.DataFrame({"t": []})
178
+
179
+ # Normalize acceptable inputs to a 1D pandas Series.
180
+ # - np.ndarray/list: wrapped into Series directly.
181
+ # - DatetimeIndex: also acceptable; Series preserves datetime nature.
182
+ if isinstance(time, (np.ndarray, list, DatetimeIndex)):
183
+ time = pd.Series(time)
184
+ else:
185
+ raise TypeError(
186
+ "`time` must be a np.ndarray, "
187
+ "pandas.DatetimeIndex, list, or None.")
188
+
189
+ # Validate dimensionality: function expects a 1D sequence of time values.
190
+ if time.ndim != 1:
191
+ raise ValueError("`time` must be one-dimensional.")
192
+
193
+ # Build the output DataFrame with the time values.
194
+ time_df = pd.DataFrame({"time": time})
195
+
196
+ # Create a 1-based sequence starting from `start` without altering
197
+ # the DataFrame index.
198
+ time_df["t"] = time_df.index + start
199
+
200
+ return time_df
201
+
202
+
203
+ def to_numpy(
204
+ y: Union[np.array, pd.Series, pd.DataFrame, float],
205
+ X: Union[np.array, pd.DataFrame, None] # noqa: N803
206
+ ) -> Tuple[np.array, Union[np.array, None], pd.DataFrame]:
207
+ """Convert targets and features to NumPy, returning time metadata.
208
+
209
+ Converts `y` to a 1D NumPy array and `X` (if provided) to a NumPy array,
210
+ preserving row alignment. Also returns a time DataFrame (from index if
211
+ datetime-like) and inferred frequency when applicable.
212
+
213
+ Args:
214
+ y (np.ndarray | pd.Series | pd.DataFrame | float):
215
+ Target variable; must be 1D or a single-column DataFrame.
216
+ X (np.ndarray | pd.DataFrame | None):
217
+ Feature matrix; if not None, its number of rows must match `y`.
218
+
219
+ Returns:
220
+ Tuple[np.ndarray, np.ndarray | None, pd.DataFrame, str | None]:
221
+ - y: 1D NumPy array of targets.
222
+ - X: NumPy array of features or None.
223
+ - time_df: DataFrame with time info derived from
224
+ `y.index` when convertible.
225
+ - freq: Inferred frequency string from datetime index, or None.
226
+
227
+ Raises:
228
+ TypeError: If `y` or `X` are of unsupported types.
229
+ ValueError: If `y` is not 1D/single-column or if `X` rows mismatch `y`.
230
+ """
231
+ # Extract time from y.index when datetime-like; otherwise ignore.
232
+ if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series):
233
+ index = y.index
234
+ if pd.api.types.is_datetime64_any_dtype(index):
235
+ time = pd.to_datetime(index, errors='coerce')
236
+ elif pd.api.types.is_numeric_dtype(index):
237
+ time = None
238
+ else:
239
+ try:
240
+ time_converted = pd.to_datetime(index, errors='coerce')
241
+ if time_converted.notna().all():
242
+ time = time_converted
243
+ else:
244
+ time = None
245
+ except Exception:
246
+ print("y.index is not a valid datetime or could not be"
247
+ " converted. It will not be used as the time column.")
248
+ else:
249
+ time = None
250
+ time_df = to_time_df(time)
251
+
252
+ if time is not None:
253
+ freq = pd.infer_freq(time)
254
+ else:
255
+ freq = None
256
+
257
+ # Convert y to 1D ndarray.
258
+ if isinstance(y, pd.DataFrame):
259
+ if y.shape[1] == 1:
260
+ y = y.iloc[:, 0]
261
+ else:
262
+ raise ValueError("`y` should be a 1D array, Series,"
263
+ " or single-column DataFrame.")
264
+ if isinstance(y, pd.Series):
265
+ y = y.to_numpy()
266
+ elif isinstance(y, np.ndarray):
267
+ if y.ndim > 1:
268
+ raise ValueError("`y` must be 1D.")
269
+ elif isinstance(y, float):
270
+ y = np.array([y])
271
+ else:
272
+ raise TypeError("`y` must be a np.array, Series,"
273
+ " or single-column DataFrame.")
274
+
275
+ # Convert X to ndarray if provided and validate row count.
276
+ if X is not None:
277
+ if isinstance(X, pd.DataFrame):
278
+ X = X.to_numpy()
279
+ elif not isinstance(X, np.ndarray):
280
+ raise TypeError("`X` must be a np.array or pd.DataFrame.")
281
+
282
+ if (X.shape[0] > 1):
283
+ if (X.shape[0] != y.shape[0]) and (X.shape[0] > 1):
284
+ raise ValueError("`X` and `y` must have the same number"
285
+ " of rows.")
286
+
287
+ return y, X, time_df, freq