datfid 0.1.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datfid-0.1.21/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 DATFID
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
datfid-0.1.21/PKG-INFO ADDED
@@ -0,0 +1,155 @@
1
+ Metadata-Version: 2.4
2
+ Name: datfid
3
+ Version: 0.1.21
4
+ Summary: SDK to access the DATFID API hosted on Hugging Face Spaces
5
+ Author: DATFID
6
+ Author-email: igor.schapiro@datfid.com
7
+ License: MIT
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Requires-Python: >=3.7
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: requests>=2.31.0
14
+ Requires-Dist: pandas>=1.0.1
15
+ Requires-Dist: numpy<2.1,>=1.22
16
+ Dynamic: author
17
+ Dynamic: author-email
18
+ Dynamic: classifier
19
+ Dynamic: description
20
+ Dynamic: description-content-type
21
+ Dynamic: license
22
+ Dynamic: license-file
23
+ Dynamic: requires-dist
24
+ Dynamic: requires-python
25
+ Dynamic: summary
26
+
27
+ # DATFID SDK
28
+
29
+ A Python SDK to access the DATFID API to forecast your data.
30
+
31
+ ## Features
32
+
33
+ - **Easy model fitting**: Build panel data models with time-dependent and static features.
34
+ - **Flexible lag handling**: Specify lags for the dependent variable and selected features.
35
+ - **Forecasting**: Generate future predictions with aligned timestamps and IDs.
36
+ - **Statistical options**: Filter features by significance and apply mean-variance tests.
37
+ - **White box full interpretability**: Get fully interpretable model with equation, estimated parameters, and standard errors.
38
+
39
+ ## Installation
40
+
41
+ ```bash
42
+ pip install datfid
43
+ ```
44
+
45
+ ## Usage
46
+
47
+ Before using the SDK, please request an access token by emailing **admin@datfid.com** or by visiting our website [datfid.com](https://datfid.com).
48
+
49
+ ```python
50
+ from datfid import DATFIDClient
51
+
52
+ # Initialize the client with your DATFID token
53
+ client = DATFIDClient(token="your_DATFID_token")
54
+
55
+ # Fit a model
56
+ fit_result = client.fit_model(
57
+ df=dataframe,
58
+ id_col="name of id column",
59
+ time_col="name of time column",
60
+ y="name of dependent variable",
61
+ lag_y="starting lag : ending lag",
62
+ lagged_features={
63
+ "feature 1": "starting lag : ending lag",
64
+ "feature 2": "starting lag : ending lag"
65
+ },
66
+ current_features=["feature 3", "feature 4"],
67
+ filter_by_significance=True/False,
68
+ meanvar_test=True/False
69
+ )
70
+
71
+ # Generate forecasts
72
+ forecast_df = client.forecast_model(
73
+ df_forecast=dataframe
74
+ )
75
+
76
+ # The forecast DataFrame contains the individual IDs and timestamps
77
+ # from the original data plus a "forecast" column with predicted values.
78
+ ```
79
+
80
+ ## Example 1
81
+
82
+ Sample dataset from GitHub (Food and Beverages demand forecasting):
83
+
84
+ ```python
85
+ import pandas as pd
86
+ from datfid import DATFIDClient
87
+
88
+ # Initialize the client with your DATFID token
89
+ client = DATFIDClient(token="your_DATFID_token")
90
+
91
+ # Load dataset for model fitting
92
+ url_fit = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Food_Beverages.xlsx"
93
+ df = pd.read_excel(url_fit)
94
+
95
+ # Fit the model
96
+ result = client.fit_model(df=df,
97
+ id_col="Product",
98
+ time_col="Time",
99
+ y="Revenue",
100
+ current_features='all',
101
+ filter_by_significance=True
102
+ )
103
+
104
+ # Load dataset for forecasting
105
+ url_forecast = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Food_Beverages_forecast.xlsx"
106
+ df_forecast = pd.read_excel(url_forecast)
107
+
108
+ # Forecast revenue using the fitted model
109
+ forecast = client.forecast_model(df_forecast=df_forecast)
110
+ ```
111
+
112
+ ## Example 2
113
+
114
+ Slightly larger sample dataset from GitHub (Banking sector, forecasting loan probability):
115
+
116
+ ```python
117
+ import pandas as pd
118
+ from datfid import DATFIDClient
119
+
120
+ # Initialize the client with your DATFID token
121
+ client = DATFIDClient(token="your_DATFID_token")
122
+
123
+ # Load dataset for model fitting
124
+ url_fit = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Banking_extended.xlsx"
125
+ df = pd.read_excel(url_fit)
126
+
127
+ # Fit the model
128
+ result = client.fit_model(df=df,
129
+ id_col="Individual",
130
+ time_col="Time",
131
+ y="Loan Probability",
132
+ lag_y="1:3",
133
+ lagged_features={"Income Level": "1:3"},
134
+ filter_by_significance=True)
135
+
136
+ # Load dataset for forecasting
137
+ url_forecast = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Banking_extended_forecast.xlsx"
138
+ df_forecast = pd.read_excel(url_forecast)
139
+
140
+ # Forecast loan probability using the fitted model
141
+ forecast = client.forecast_model(df_forecast=df_forecast)
142
+ ```
143
+
144
+ ## API Reference
145
+
146
+ ### DATFIDClient
147
+
148
+ #### `client = DATFIDClient(token: str)`
149
+ Initialize the client with your DATFID token.
150
+
151
+ #### `client.fit_model(df: pd.DataFrame, id_col: str, time_col: str, y: str, lag_y: Optional[Union[int, str, list[int]]] = None, lagged_features: Optional[Dict[str, int]] = None, current_features: Optional[list] = None, filter_by_significance: bool = False, meanvar_test: bool = False) -> SimpleNamespace`
152
+ Fit a model using the provided dataset.
153
+
154
+ #### `client.forecast_model(df_forecast: pd.DataFrame) -> pd.DataFrame`
155
+ Generate forecasts using the fitted model.
@@ -0,0 +1,129 @@
1
+ # DATFID SDK
2
+
3
+ A Python SDK to access the DATFID API to forecast your data.
4
+
5
+ ## Features
6
+
7
+ - **Easy model fitting**: Build panel data models with time-dependent and static features.
8
+ - **Flexible lag handling**: Specify lags for the dependent variable and selected features.
9
+ - **Forecasting**: Generate future predictions with aligned timestamps and IDs.
10
+ - **Statistical options**: Filter features by significance and apply mean-variance tests.
11
+ - **White box full interpretability**: Get fully interpretable model with equation, estimated parameters, and standard errors.
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ pip install datfid
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ Before using the SDK, please request an access token by emailing **admin@datfid.com** or by visiting our website [datfid.com](https://datfid.com).
22
+
23
+ ```python
24
+ from datfid import DATFIDClient
25
+
26
+ # Initialize the client with your DATFID token
27
+ client = DATFIDClient(token="your_DATFID_token")
28
+
29
+ # Fit a model
30
+ fit_result = client.fit_model(
31
+ df=dataframe,
32
+ id_col="name of id column",
33
+ time_col="name of time column",
34
+ y="name of dependent variable",
35
+ lag_y="starting lag : ending lag",
36
+ lagged_features={
37
+ "feature 1": "starting lag : ending lag",
38
+ "feature 2": "starting lag : ending lag"
39
+ },
40
+ current_features=["feature 3", "feature 4"],
41
+ filter_by_significance=True/False,
42
+ meanvar_test=True/False
43
+ )
44
+
45
+ # Generate forecasts
46
+ forecast_df = client.forecast_model(
47
+ df_forecast=dataframe
48
+ )
49
+
50
+ # The forecast DataFrame contains the individual IDs and timestamps
51
+ # from the original data plus a "forecast" column with predicted values.
52
+ ```
53
+
54
+ ## Example 1
55
+
56
+ Sample dataset from GitHub (Food and Beverages demand forecasting):
57
+
58
+ ```python
59
+ import pandas as pd
60
+ from datfid import DATFIDClient
61
+
62
+ # Initialize the client with your DATFID token
63
+ client = DATFIDClient(token="your_DATFID_token")
64
+
65
+ # Load dataset for model fitting
66
+ url_fit = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Food_Beverages.xlsx"
67
+ df = pd.read_excel(url_fit)
68
+
69
+ # Fit the model
70
+ result = client.fit_model(df=df,
71
+ id_col="Product",
72
+ time_col="Time",
73
+ y="Revenue",
74
+ current_features='all',
75
+ filter_by_significance=True
76
+ )
77
+
78
+ # Load dataset for forecasting
79
+ url_forecast = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Food_Beverages_forecast.xlsx"
80
+ df_forecast = pd.read_excel(url_forecast)
81
+
82
+ # Forecast revenue using the fitted model
83
+ forecast = client.forecast_model(df_forecast=df_forecast)
84
+ ```
85
+
86
+ ## Example 2
87
+
88
+ Slightly larger sample dataset from GitHub (Banking sector, forecasting loan probability):
89
+
90
+ ```python
91
+ import pandas as pd
92
+ from datfid import DATFIDClient
93
+
94
+ # Initialize the client with your DATFID token
95
+ client = DATFIDClient(token="your_DATFID_token")
96
+
97
+ # Load dataset for model fitting
98
+ url_fit = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Banking_extended.xlsx"
99
+ df = pd.read_excel(url_fit)
100
+
101
+ # Fit the model
102
+ result = client.fit_model(df=df,
103
+ id_col="Individual",
104
+ time_col="Time",
105
+ y="Loan Probability",
106
+ lag_y="1:3",
107
+ lagged_features={"Income Level": "1:3"},
108
+ filter_by_significance=True)
109
+
110
+ # Load dataset for forecasting
111
+ url_forecast = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Banking_extended_forecast.xlsx"
112
+ df_forecast = pd.read_excel(url_forecast)
113
+
114
+ # Forecast loan probability using the fitted model
115
+ forecast = client.forecast_model(df_forecast=df_forecast)
116
+ ```
117
+
118
+ ## API Reference
119
+
120
+ ### DATFIDClient
121
+
122
+ #### `client = DATFIDClient(token: str)`
123
+ Initialize the client with your DATFID token.
124
+
125
+ #### `client.fit_model(df: pd.DataFrame, id_col: str, time_col: str, y: str, lag_y: Optional[Union[int, str, list[int]]] = None, lagged_features: Optional[Dict[str, int]] = None, current_features: Optional[list] = None, filter_by_significance: bool = False, meanvar_test: bool = False) -> SimpleNamespace`
126
+ Fit a model using the provided dataset.
127
+
128
+ #### `client.forecast_model(df_forecast: pd.DataFrame) -> pd.DataFrame`
129
+ Generate forecasts using the fitted model.
@@ -0,0 +1 @@
1
+ from .client import DATFIDClient
@@ -0,0 +1,320 @@
1
+ import requests
2
+ import pandas as pd
3
+ from typing import Dict, Any, Optional, Union
4
+ import json
5
+ from types import SimpleNamespace
6
+ import tempfile
7
+ import os
8
+ import gc
9
+ import psutil
10
+ import logging
11
+
12
+ # for nice output
13
+ class FitResult(SimpleNamespace):
14
+ _ROW4 = ["Estimate", "Standard Error", "T statistic", "P value"]
15
+ _PERF5 = ["R2 within", "R2 between", "R2 overall", "MSE", "MAE"]
16
+
17
+ @property
18
+ def id(self):
19
+ import pandas as pd
20
+ if hasattr(self, "df") and isinstance(self.df, pd.DataFrame) and "ID" in self.df.columns:
21
+ return pd.DataFrame(self.df["ID"].astype(str).unique())
22
+ return pd.DataFrame([])
23
+
24
+ @property
25
+ def ID(self):
26
+ import pandas as pd
27
+ if hasattr(self, "df") and isinstance(self.df, pd.DataFrame) and "ID" in self.df.columns:
28
+ return pd.DataFrame(self.df["ID"].astype(str).unique())
29
+ return pd.DataFrame([])
30
+
31
+ @property
32
+ def Id(self):
33
+ import pandas as pd
34
+ if hasattr(self, "df") and isinstance(self.df, pd.DataFrame) and "ID" in self.df.columns:
35
+ return pd.DataFrame(self.df["ID"].astype(str).unique())
36
+ return pd.DataFrame([])
37
+
38
+ @staticmethod
39
+ def _df4(rows_list):
40
+ if not isinstance(rows_list, list):
41
+ return rows_list
42
+ return pd.DataFrame(rows_list, index=FitResult._ROW4[:len(rows_list)])
43
+
44
+ @staticmethod
45
+ def _df_perf(rows_list):
46
+ if not isinstance(rows_list, list):
47
+ return rows_list
48
+ return pd.DataFrame(rows_list, index=FitResult._PERF5[:len(rows_list)])
49
+
50
+ def __init__(self, **kwargs):
51
+ # Convert list→DataFrame for table-like fields
52
+ if "alpha" in kwargs:
53
+ kwargs["alpha"] = self._df4(kwargs["alpha"])
54
+ if "beta" in kwargs:
55
+ kwargs["beta"] = self._df4(kwargs["beta"])
56
+ if "Performance" in kwargs:
57
+ kwargs["Performance"] = self._df_perf(kwargs["Performance"])
58
+ if "df" in kwargs and isinstance(kwargs["df"], list):
59
+ kwargs["df"] = pd.DataFrame(kwargs["df"])
60
+ super().__init__(**kwargs)
61
+
62
+ class FitResultDict(dict):
63
+ @property
64
+ def id(self):
65
+ return pd.DataFrame({"ID": list(self.keys())})
66
+
67
+ @property
68
+ def ID(self):
69
+ return pd.DataFrame({"ID": list(self.keys())})
70
+
71
+ @property
72
+ def Id(self):
73
+ return pd.DataFrame({"ID": list(self.keys())})
74
+
75
+
76
+ class DATFIDClient:
77
+ def __init__(self, token: str):
78
+ self.api_url = "https://datfid-org-datfid-master.hf.space/"
79
+ self.headers = {"Authorization": f"Bearer {token}"}
80
+ self.logger = logging.getLogger(__name__)
81
+
82
+ def _cleanup_memory(self):
83
+ """Clean up memory after operations"""
84
+ gc.collect()
85
+ if hasattr(psutil, 'Process'):
86
+ process = psutil.Process()
87
+ try:
88
+ process.memory_info().rss # Force memory info update
89
+ except:
90
+ pass
91
+
92
+ def ping(self):
93
+ try:
94
+ response = requests.get(self.api_url, headers=self.headers).json()
95
+ self._cleanup_memory()
96
+ return response
97
+ except Exception as e:
98
+ self.logger.error(f"Ping failed: {str(e)}")
99
+ raise
100
+
101
+ def secure_ping(self):
102
+ try:
103
+ response = requests.get(f"{self.api_url}secure-ping/", headers=self.headers).json()
104
+ self._cleanup_memory()
105
+ return response
106
+ except Exception as e:
107
+ self.logger.error(f"Secure ping failed: {str(e)}")
108
+ raise
109
+
110
+ def fit_model(
111
+ self,
112
+ df: pd.DataFrame,
113
+ id_col: str,
114
+ time_col: str,
115
+ y: str,
116
+ lag_y: Optional[Union[int, str, list[int]]] = None,
117
+ lagged_features: Optional[Dict[str, int]] = None,
118
+ current_features: Optional[list] = None,
119
+ filter_by_significance: bool = False,
120
+ meanvar_test: bool = False,
121
+ signif: float = 0.05
122
+ ) -> FitResult:
123
+ """
124
+ Fit a model using the DATFID API.
125
+
126
+ Args:
127
+ df: DataFrame containing the data
128
+ id_col: Name of the ID column
129
+ time_col: Name of the time column
130
+ y: Name of the target variable
131
+ lagged_features: Dictionary of features and their lag values
132
+ current_features: List of current features to use
133
+ filter_by_significance: Whether to filter features by significance
134
+ meanvar_test: Whether to perform mean-variance test
135
+ signif: Significance level (default 0.05), used when filter_by_significance is True
136
+
137
+ Returns:
138
+ SimpleNamespace containing the model fit results
139
+ """
140
+
141
+ df = df.copy()
142
+ for col in df.columns:
143
+ if pd.api.types.is_datetime64_any_dtype(df[col]):
144
+ df[col] = df[col].astype(str)
145
+
146
+ data = {
147
+ "df": df.to_dict(orient="records"),
148
+ "id_col": id_col,
149
+ "time_col": time_col,
150
+ "y": y,
151
+ "lag_y": lag_y,
152
+ "lagged_features": lagged_features or {},
153
+ "current_features": current_features or [],
154
+ "filter_by_significance": filter_by_significance,
155
+ "meanvar_test": meanvar_test,
156
+ "signif": signif
157
+ }
158
+
159
+ response = requests.post(
160
+ f"{self.api_url}modelfit/",
161
+ json=data,
162
+ headers=self.headers
163
+ )
164
+
165
+ if response.status_code != 200:
166
+ raise Exception(f"Model fit failed: {response.text}")
167
+
168
+ result_dict = response.json()
169
+ return FitResult(**result_dict)
170
+
171
+ def forecast_model(
172
+ self,
173
+ df_forecast: pd.DataFrame
174
+ ) -> pd.DataFrame:
175
+ """
176
+ Generate forecasts using the fitted model.
177
+
178
+ Args:
179
+ df_forecast: DataFrame containing the forecast data
180
+
181
+ Returns:
182
+ DataFrame containing the forecast results
183
+ """
184
+
185
+ try:
186
+ df_forecast = df_forecast.copy()
187
+ for col in df_forecast.columns:
188
+ if pd.api.types.is_datetime64_any_dtype(df_forecast[col]):
189
+ df_forecast[col] = df_forecast[col].astype(str)
190
+
191
+ # Convert DataFrame to list of records
192
+ data = df_forecast.to_dict(orient="records")
193
+
194
+ response = requests.post(
195
+ f"{self.api_url}modelforecast/",
196
+ json=data,
197
+ headers=self.headers
198
+ )
199
+
200
+ if response.status_code != 200:
201
+ raise Exception(f"Forecast generation failed: {response.text}")
202
+
203
+ result = pd.DataFrame(response.json())
204
+
205
+ # Clean up memory after operation
206
+ del df_forecast
207
+ del data
208
+ self._cleanup_memory()
209
+
210
+ return result
211
+ except Exception as e:
212
+ self.logger.error(f"Forecast generation failed: {str(e)}")
213
+ raise
214
+
215
+ def fit_model_ind(
216
+ self,
217
+ df: pd.DataFrame,
218
+ id_col: str,
219
+ time_col: str,
220
+ y: str,
221
+ lag_y: Optional[Union[int, str, list[int]]] = None,
222
+ lagged_features: Optional[Dict[str, int]] = None,
223
+ current_features: Optional[list] = None,
224
+ filter_by_significance: bool = False,
225
+ meanvar_test: bool = False,
226
+ signif: float = 0.05
227
+ ) -> FitResultDict:
228
+ """
229
+ Fit a model individual by individual using the DATFID API.
230
+
231
+ Args:
232
+ df: DataFrame containing the data
233
+ id_col: Name of the ID column
234
+ time_col: Name of the time column
235
+ y: Name of the target variable
236
+ lagged_features: Dictionary of features and their lag values
237
+ current_features: List of current features to use
238
+ filter_by_significance: Whether to filter features by significance
239
+ meanvar_test: Whether to perform mean-variance test
240
+ signif: Significance level (default 0.05), used when filter_by_significance is True
241
+
242
+ Returns:
243
+ SimpleNamespace containing the model fit results
244
+ """
245
+
246
+ df = df.copy()
247
+ for col in df.columns:
248
+ if pd.api.types.is_datetime64_any_dtype(df[col]):
249
+ df[col] = df[col].astype(str)
250
+
251
+ data = {
252
+ "df": df.to_dict(orient="records"),
253
+ "id_col": id_col,
254
+ "time_col": time_col,
255
+ "y": y,
256
+ "lag_y": lag_y,
257
+ "lagged_features": lagged_features or {},
258
+ "current_features": current_features or [],
259
+ "filter_by_significance": filter_by_significance,
260
+ "meanvar_test": meanvar_test,
261
+ "signif": signif
262
+ }
263
+
264
+ response = requests.post(
265
+ f"{self.api_url}modelfit_ind/",
266
+ json=data,
267
+ headers=self.headers
268
+ )
269
+
270
+ if response.status_code != 200:
271
+ raise Exception(f"Model fit failed: {response.text}")
272
+
273
+ raw = response.json()
274
+ # Wrap each per-id result into a SimpleNamespace for dot access:
275
+ result_per_id = FitResultDict({str(k): FitResult(**v) for k, v in raw.items()})
276
+ return result_per_id # FitResultDict[str, SimpleNamespace]
277
+
278
+ def forecast_model_ind(
279
+ self,
280
+ df_forecast: pd.DataFrame
281
+ ) -> pd.DataFrame:
282
+ """
283
+ Generate forecasts using the fitted individual by individual model.
284
+
285
+ Args:
286
+ df_forecast: DataFrame containing the forecast data
287
+
288
+ Returns:
289
+ DataFrame containing the forecast results
290
+ """
291
+
292
+ try:
293
+ df_forecast = df_forecast.copy()
294
+ for col in df_forecast.columns:
295
+ if pd.api.types.is_datetime64_any_dtype(df_forecast[col]):
296
+ df_forecast[col] = df_forecast[col].astype(str)
297
+
298
+ # Convert DataFrame to list of records
299
+ data = df_forecast.to_dict(orient="records")
300
+
301
+ response = requests.post(
302
+ f"{self.api_url}modelforecast_ind/",
303
+ json=data,
304
+ headers=self.headers
305
+ )
306
+
307
+ if response.status_code != 200:
308
+ raise Exception(f"Forecast generation failed: {response.text}")
309
+
310
+ result = pd.DataFrame(response.json())
311
+
312
+ # Clean up memory after operation
313
+ del df_forecast
314
+ del data
315
+ self._cleanup_memory()
316
+
317
+ return result
318
+ except Exception as e:
319
+ self.logger.error(f"Forecast generation failed: {str(e)}")
320
+ raise
@@ -0,0 +1,155 @@
1
+ Metadata-Version: 2.4
2
+ Name: datfid
3
+ Version: 0.1.21
4
+ Summary: SDK to access the DATFID API hosted on Hugging Face Spaces
5
+ Author: DATFID
6
+ Author-email: igor.schapiro@datfid.com
7
+ License: MIT
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Requires-Python: >=3.7
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: requests>=2.31.0
14
+ Requires-Dist: pandas>=1.0.1
15
+ Requires-Dist: numpy<2.1,>=1.22
16
+ Dynamic: author
17
+ Dynamic: author-email
18
+ Dynamic: classifier
19
+ Dynamic: description
20
+ Dynamic: description-content-type
21
+ Dynamic: license
22
+ Dynamic: license-file
23
+ Dynamic: requires-dist
24
+ Dynamic: requires-python
25
+ Dynamic: summary
26
+
27
+ # DATFID SDK
28
+
29
+ A Python SDK to access the DATFID API to forecast your data.
30
+
31
+ ## Features
32
+
33
+ - **Easy model fitting**: Build panel data models with time-dependent and static features.
34
+ - **Flexible lag handling**: Specify lags for the dependent variable and selected features.
35
+ - **Forecasting**: Generate future predictions with aligned timestamps and IDs.
36
+ - **Statistical options**: Filter features by significance and apply mean-variance tests.
37
+ - **White box full interpretability**: Get fully interpretable model with equation, estimated parameters, and standard errors.
38
+
39
+ ## Installation
40
+
41
+ ```bash
42
+ pip install datfid
43
+ ```
44
+
45
+ ## Usage
46
+
47
+ Before using the SDK, please request an access token by emailing **admin@datfid.com** or by visiting our website [datfid.com](https://datfid.com).
48
+
49
+ ```python
50
+ from datfid import DATFIDClient
51
+
52
+ # Initialize the client with your DATFID token
53
+ client = DATFIDClient(token="your_DATFID_token")
54
+
55
+ # Fit a model
56
+ fit_result = client.fit_model(
57
+ df=dataframe,
58
+ id_col="name of id column",
59
+ time_col="name of time column",
60
+ y="name of dependent variable",
61
+ lag_y="starting lag : ending lag",
62
+ lagged_features={
63
+ "feature 1": "starting lag : ending lag",
64
+ "feature 2": "starting lag : ending lag"
65
+ },
66
+ current_features=["feature 3", "feature 4"],
67
+ filter_by_significance=True/False,
68
+ meanvar_test=True/False
69
+ )
70
+
71
+ # Generate forecasts
72
+ forecast_df = client.forecast_model(
73
+ df_forecast=dataframe
74
+ )
75
+
76
+ # The forecast DataFrame contains the individual IDs and timestamps
77
+ # from the original data plus a "forecast" column with predicted values.
78
+ ```
79
+
80
+ ## Example 1
81
+
82
+ Sample dataset from GitHub (Food and Beverages demand forecasting):
83
+
84
+ ```python
85
+ import pandas as pd
86
+ from datfid import DATFIDClient
87
+
88
+ # Initialize the client with your DATFID token
89
+ client = DATFIDClient(token="your_DATFID_token")
90
+
91
+ # Load dataset for model fitting
92
+ url_fit = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Food_Beverages.xlsx"
93
+ df = pd.read_excel(url_fit)
94
+
95
+ # Fit the model
96
+ result = client.fit_model(df=df,
97
+ id_col="Product",
98
+ time_col="Time",
99
+ y="Revenue",
100
+ current_features='all',
101
+ filter_by_significance=True
102
+ )
103
+
104
+ # Load dataset for forecasting
105
+ url_forecast = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Food_Beverages_forecast.xlsx"
106
+ df_forecast = pd.read_excel(url_forecast)
107
+
108
+ # Forecast revenue using the fitted model
109
+ forecast = client.forecast_model(df_forecast=df_forecast)
110
+ ```
111
+
112
+ ## Example 2
113
+
114
+ Slightly larger sample dataset from GitHub (Banking sector, forecasting loan probability):
115
+
116
+ ```python
117
+ import pandas as pd
118
+ from datfid import DATFIDClient
119
+
120
+ # Initialize the client with your DATFID token
121
+ client = DATFIDClient(token="your_DATFID_token")
122
+
123
+ # Load dataset for model fitting
124
+ url_fit = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Banking_extended.xlsx"
125
+ df = pd.read_excel(url_fit)
126
+
127
+ # Fit the model
128
+ result = client.fit_model(df=df,
129
+ id_col="Individual",
130
+ time_col="Time",
131
+ y="Loan Probability",
132
+ lag_y="1:3",
133
+ lagged_features={"Income Level": "1:3"},
134
+ filter_by_significance=True)
135
+
136
+ # Load dataset for forecasting
137
+ url_forecast = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Banking_extended_forecast.xlsx"
138
+ df_forecast = pd.read_excel(url_forecast)
139
+
140
+ # Forecast loan probability using the fitted model
141
+ forecast = client.forecast_model(df_forecast=df_forecast)
142
+ ```
143
+
144
+ ## API Reference
145
+
146
+ ### DATFIDClient
147
+
148
+ #### `client = DATFIDClient(token: str)`
149
+ Initialize the client with your DATFID token.
150
+
151
+ #### `client.fit_model(df: pd.DataFrame, id_col: str, time_col: str, y: str, lag_y: Optional[Union[int, str, list[int]]] = None, lagged_features: Optional[Dict[str, int]] = None, current_features: Optional[list] = None, filter_by_significance: bool = False, meanvar_test: bool = False) -> SimpleNamespace`
152
+ Fit a model using the provided dataset.
153
+
154
+ #### `client.forecast_model(df_forecast: pd.DataFrame) -> pd.DataFrame`
155
+ Generate forecasts using the fitted model.
@@ -0,0 +1,10 @@
1
+ LICENSE
2
+ README.md
3
+ setup.py
4
+ datfid/__init__.py
5
+ datfid/client.py
6
+ datfid.egg-info/PKG-INFO
7
+ datfid.egg-info/SOURCES.txt
8
+ datfid.egg-info/dependency_links.txt
9
+ datfid.egg-info/requires.txt
10
+ datfid.egg-info/top_level.txt
@@ -0,0 +1,3 @@
1
+ requests>=2.31.0
2
+ pandas>=1.0.1
3
+ numpy<2.1,>=1.22
@@ -0,0 +1 @@
1
+ datfid
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
datfid-0.1.21/setup.py ADDED
@@ -0,0 +1,38 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ # to build:
4
+ # 1) open this file at level of datfid-sdk folder
5
+ # 2) change version in this file and save it
6
+ # 3) delete folder datfid.egg-info
7
+ # 4) delete older files from dist folder
8
+ # 5) in terminal: python setup.py sdist bdist_wheel
9
+ # 6) in terminal: twine upload --repository pypi dist/*
10
+ # 7) in hugging face delete older files from dist folder
11
+ # 8) in hugging face upload updated files
12
+ # 9) in terminal uninstall older version of datfid: pip uninstall datfid
13
+ # 10) in terminal install new version of datfid: pip install --index-url https://test.pypi.org/simple/ datfid
14
+
15
+ with open("README.md", "r", encoding="utf-8") as fh:
16
+ long_description = fh.read()
17
+
18
+ setup(
19
+ name="datfid",
20
+ version="0.1.21",
21
+ description="SDK to access the DATFID API hosted on Hugging Face Spaces",
22
+ long_description=long_description,
23
+ long_description_content_type="text/markdown", # Important!
24
+ author="DATFID",
25
+ author_email="igor.schapiro@datfid.com",
26
+ license="MIT",
27
+ packages=find_packages(),
28
+ install_requires=[
29
+ "requests>=2.31.0",
30
+ "pandas>=1.0.1",
31
+ "numpy>=1.22, <2.1"
32
+ ],
33
+ python_requires=">=3.7",
34
+ classifiers=[
35
+ "License :: OSI Approved :: MIT License",
36
+ "Programming Language :: Python :: 3",
37
+ ],
38
+ )