datfid 0.1.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datfid-0.1.21/LICENSE +21 -0
- datfid-0.1.21/PKG-INFO +155 -0
- datfid-0.1.21/README.md +129 -0
- datfid-0.1.21/datfid/__init__.py +1 -0
- datfid-0.1.21/datfid/client.py +320 -0
- datfid-0.1.21/datfid.egg-info/PKG-INFO +155 -0
- datfid-0.1.21/datfid.egg-info/SOURCES.txt +10 -0
- datfid-0.1.21/datfid.egg-info/dependency_links.txt +1 -0
- datfid-0.1.21/datfid.egg-info/requires.txt +3 -0
- datfid-0.1.21/datfid.egg-info/top_level.txt +1 -0
- datfid-0.1.21/setup.cfg +4 -0
- datfid-0.1.21/setup.py +38 -0
datfid-0.1.21/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 DATFID
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
datfid-0.1.21/PKG-INFO
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datfid
|
|
3
|
+
Version: 0.1.21
|
|
4
|
+
Summary: SDK to access the DATFID API hosted on Hugging Face Spaces
|
|
5
|
+
Author: DATFID
|
|
6
|
+
Author-email: igor.schapiro@datfid.com
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Requires-Python: >=3.7
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: requests>=2.31.0
|
|
14
|
+
Requires-Dist: pandas>=1.0.1
|
|
15
|
+
Requires-Dist: numpy<2.1,>=1.22
|
|
16
|
+
Dynamic: author
|
|
17
|
+
Dynamic: author-email
|
|
18
|
+
Dynamic: classifier
|
|
19
|
+
Dynamic: description
|
|
20
|
+
Dynamic: description-content-type
|
|
21
|
+
Dynamic: license
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
Dynamic: requires-dist
|
|
24
|
+
Dynamic: requires-python
|
|
25
|
+
Dynamic: summary
|
|
26
|
+
|
|
27
|
+
# DATFID SDK
|
|
28
|
+
|
|
29
|
+
A Python SDK to access the DATFID API to forecast your data.
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
- **Easy model fitting**: Build panel data models with time-dependent and static features.
|
|
34
|
+
- **Flexible lag handling**: Specify lags for the dependent variable and selected features.
|
|
35
|
+
- **Forecasting**: Generate future predictions with aligned timestamps and IDs.
|
|
36
|
+
- **Statistical options**: Filter features by significance and apply mean-variance tests.
|
|
37
|
+
- **White box full interpretability**: Get fully interpretable model with equation, estimated parameters, and standard errors.
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install datfid
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Usage
|
|
46
|
+
|
|
47
|
+
Before using the SDK, please request an access token by emailing **admin@datfid.com** or by visiting our website [datfid.com](https://datfid.com).
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from datfid import DATFIDClient
|
|
51
|
+
|
|
52
|
+
# Initialize the client with your DATFID token
|
|
53
|
+
client = DATFIDClient(token="your_DATFID_token")
|
|
54
|
+
|
|
55
|
+
# Fit a model
|
|
56
|
+
fit_result = client.fit_model(
|
|
57
|
+
df=dataframe,
|
|
58
|
+
id_col="name of id column",
|
|
59
|
+
time_col="name of time column",
|
|
60
|
+
y="name of dependent variable",
|
|
61
|
+
lag_y="starting lag : ending lag",
|
|
62
|
+
lagged_features={
|
|
63
|
+
"feature 1": "starting lag : ending lag",
|
|
64
|
+
"feature 2": "starting lag : ending lag"
|
|
65
|
+
},
|
|
66
|
+
current_features=["feature 3", "feature 4"],
|
|
67
|
+
filter_by_significance=True/False,
|
|
68
|
+
meanvar_test=True/False
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Generate forecasts
|
|
72
|
+
forecast_df = client.forecast_model(
|
|
73
|
+
df_forecast=dataframe
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# The forecast DataFrame contains the individual IDs and timestamps
|
|
77
|
+
# from the original data plus a "forecast" column with predicted values.
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Example 1
|
|
81
|
+
|
|
82
|
+
Sample dataset from GitHub (Food and Beverages demand forecasting):
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
import pandas as pd
|
|
86
|
+
from datfid import DATFIDClient
|
|
87
|
+
|
|
88
|
+
# Initialize the client with your DATFID token
|
|
89
|
+
client = DATFIDClient(token="your_DATFID_token")
|
|
90
|
+
|
|
91
|
+
# Load dataset for model fitting
|
|
92
|
+
url_fit = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Food_Beverages.xlsx"
|
|
93
|
+
df = pd.read_excel(url_fit)
|
|
94
|
+
|
|
95
|
+
# Fit the model
|
|
96
|
+
result = client.fit_model(df=df,
|
|
97
|
+
id_col="Product",
|
|
98
|
+
time_col="Time",
|
|
99
|
+
y="Revenue",
|
|
100
|
+
current_features='all',
|
|
101
|
+
filter_by_significance=True
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Load dataset for forecasting
|
|
105
|
+
url_forecast = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Food_Beverages_forecast.xlsx"
|
|
106
|
+
df_forecast = pd.read_excel(url_forecast)
|
|
107
|
+
|
|
108
|
+
# Forecast revenue using the fitted model
|
|
109
|
+
forecast = client.forecast_model(df_forecast=df_forecast)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Example 2
|
|
113
|
+
|
|
114
|
+
Slightly larger sample dataset from GitHub (Banking sector, forecasting loan probability):
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
import pandas as pd
|
|
118
|
+
from datfid import DATFIDClient
|
|
119
|
+
|
|
120
|
+
# Initialize the client with your DATFID token
|
|
121
|
+
client = DATFIDClient(token="your_DATFID_token")
|
|
122
|
+
|
|
123
|
+
# Load dataset for model fitting
|
|
124
|
+
url_fit = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Banking_extended.xlsx"
|
|
125
|
+
df = pd.read_excel(url_fit)
|
|
126
|
+
|
|
127
|
+
# Fit the model
|
|
128
|
+
result = client.fit_model(df=df,
|
|
129
|
+
id_col="Individual",
|
|
130
|
+
time_col="Time",
|
|
131
|
+
y="Loan Probability",
|
|
132
|
+
lag_y="1:3",
|
|
133
|
+
lagged_features={"Income Level": "1:3"},
|
|
134
|
+
filter_by_significance=True)
|
|
135
|
+
|
|
136
|
+
# Load dataset for forecasting
|
|
137
|
+
url_forecast = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Banking_extended_forecast.xlsx"
|
|
138
|
+
df_forecast = pd.read_excel(url_forecast)
|
|
139
|
+
|
|
140
|
+
# Forecast loan probability using the fitted model
|
|
141
|
+
forecast = client.forecast_model(df_forecast=df_forecast)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## API Reference
|
|
145
|
+
|
|
146
|
+
### DATFIDClient
|
|
147
|
+
|
|
148
|
+
#### `client = DATFIDClient(token: str)`
|
|
149
|
+
Initialize the client with your DATFID token.
|
|
150
|
+
|
|
151
|
+
#### `client.fit_model(df: pd.DataFrame, id_col: str, time_col: str, y: str, lag_y: Optional[Union[int, str, list[int]]] = None, lagged_features: Optional[Dict[str, int]] = None, current_features: Optional[list] = None, filter_by_significance: bool = False, meanvar_test: bool = False) -> SimpleNamespace`
|
|
152
|
+
Fit a model using the provided dataset.
|
|
153
|
+
|
|
154
|
+
#### `client.forecast_model(df_forecast: pd.DataFrame) -> pd.DataFrame`
|
|
155
|
+
Generate forecasts using the fitted model.
|
datfid-0.1.21/README.md
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# DATFID SDK
|
|
2
|
+
|
|
3
|
+
A Python SDK to access the DATFID API to forecast your data.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Easy model fitting**: Build panel data models with time-dependent and static features.
|
|
8
|
+
- **Flexible lag handling**: Specify lags for the dependent variable and selected features.
|
|
9
|
+
- **Forecasting**: Generate future predictions with aligned timestamps and IDs.
|
|
10
|
+
- **Statistical options**: Filter features by significance and apply mean-variance tests.
|
|
11
|
+
- **White box full interpretability**: Get fully interpretable model with equation, estimated parameters, and standard errors.
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install datfid
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
Before using the SDK, please request an access token by emailing **admin@datfid.com** or by visiting our website [datfid.com](https://datfid.com).
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
from datfid import DATFIDClient
|
|
25
|
+
|
|
26
|
+
# Initialize the client with your DATFID token
|
|
27
|
+
client = DATFIDClient(token="your_DATFID_token")
|
|
28
|
+
|
|
29
|
+
# Fit a model
|
|
30
|
+
fit_result = client.fit_model(
|
|
31
|
+
df=dataframe,
|
|
32
|
+
id_col="name of id column",
|
|
33
|
+
time_col="name of time column",
|
|
34
|
+
y="name of dependent variable",
|
|
35
|
+
lag_y="starting lag : ending lag",
|
|
36
|
+
lagged_features={
|
|
37
|
+
"feature 1": "starting lag : ending lag",
|
|
38
|
+
"feature 2": "starting lag : ending lag"
|
|
39
|
+
},
|
|
40
|
+
current_features=["feature 3", "feature 4"],
|
|
41
|
+
filter_by_significance=True/False,
|
|
42
|
+
meanvar_test=True/False
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Generate forecasts
|
|
46
|
+
forecast_df = client.forecast_model(
|
|
47
|
+
df_forecast=dataframe
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# The forecast DataFrame contains the individual IDs and timestamps
|
|
51
|
+
# from the original data plus a "forecast" column with predicted values.
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Example 1
|
|
55
|
+
|
|
56
|
+
Sample dataset from GitHub (Food and Beverages demand forecasting):
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
import pandas as pd
|
|
60
|
+
from datfid import DATFIDClient
|
|
61
|
+
|
|
62
|
+
# Initialize the client with your DATFID token
|
|
63
|
+
client = DATFIDClient(token="your_DATFID_token")
|
|
64
|
+
|
|
65
|
+
# Load dataset for model fitting
|
|
66
|
+
url_fit = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Food_Beverages.xlsx"
|
|
67
|
+
df = pd.read_excel(url_fit)
|
|
68
|
+
|
|
69
|
+
# Fit the model
|
|
70
|
+
result = client.fit_model(df=df,
|
|
71
|
+
id_col="Product",
|
|
72
|
+
time_col="Time",
|
|
73
|
+
y="Revenue",
|
|
74
|
+
current_features='all',
|
|
75
|
+
filter_by_significance=True
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Load dataset for forecasting
|
|
79
|
+
url_forecast = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Food_Beverages_forecast.xlsx"
|
|
80
|
+
df_forecast = pd.read_excel(url_forecast)
|
|
81
|
+
|
|
82
|
+
# Forecast revenue using the fitted model
|
|
83
|
+
forecast = client.forecast_model(df_forecast=df_forecast)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Example 2
|
|
87
|
+
|
|
88
|
+
Slightly larger sample dataset from GitHub (Banking sector, forecasting loan probability):
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
import pandas as pd
|
|
92
|
+
from datfid import DATFIDClient
|
|
93
|
+
|
|
94
|
+
# Initialize the client with your DATFID token
|
|
95
|
+
client = DATFIDClient(token="your_DATFID_token")
|
|
96
|
+
|
|
97
|
+
# Load dataset for model fitting
|
|
98
|
+
url_fit = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Banking_extended.xlsx"
|
|
99
|
+
df = pd.read_excel(url_fit)
|
|
100
|
+
|
|
101
|
+
# Fit the model
|
|
102
|
+
result = client.fit_model(df=df,
|
|
103
|
+
id_col="Individual",
|
|
104
|
+
time_col="Time",
|
|
105
|
+
y="Loan Probability",
|
|
106
|
+
lag_y="1:3",
|
|
107
|
+
lagged_features={"Income Level": "1:3"},
|
|
108
|
+
filter_by_significance=True)
|
|
109
|
+
|
|
110
|
+
# Load dataset for forecasting
|
|
111
|
+
url_forecast = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Banking_extended_forecast.xlsx"
|
|
112
|
+
df_forecast = pd.read_excel(url_forecast)
|
|
113
|
+
|
|
114
|
+
# Forecast loan probability using the fitted model
|
|
115
|
+
forecast = client.forecast_model(df_forecast=df_forecast)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## API Reference
|
|
119
|
+
|
|
120
|
+
### DATFIDClient
|
|
121
|
+
|
|
122
|
+
#### `client = DATFIDClient(token: str)`
|
|
123
|
+
Initialize the client with your DATFID token.
|
|
124
|
+
|
|
125
|
+
#### `client.fit_model(df: pd.DataFrame, id_col: str, time_col: str, y: str, lag_y: Optional[Union[int, str, list[int]]] = None, lagged_features: Optional[Dict[str, int]] = None, current_features: Optional[list] = None, filter_by_significance: bool = False, meanvar_test: bool = False) -> SimpleNamespace`
|
|
126
|
+
Fit a model using the provided dataset.
|
|
127
|
+
|
|
128
|
+
#### `client.forecast_model(df_forecast: pd.DataFrame) -> pd.DataFrame`
|
|
129
|
+
Generate forecasts using the fitted model.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .client import DATFIDClient
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from typing import Dict, Any, Optional, Union
|
|
4
|
+
import json
|
|
5
|
+
from types import SimpleNamespace
|
|
6
|
+
import tempfile
|
|
7
|
+
import os
|
|
8
|
+
import gc
|
|
9
|
+
import psutil
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
# for nice output
|
|
13
|
+
class FitResult(SimpleNamespace):
|
|
14
|
+
_ROW4 = ["Estimate", "Standard Error", "T statistic", "P value"]
|
|
15
|
+
_PERF5 = ["R2 within", "R2 between", "R2 overall", "MSE", "MAE"]
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def id(self):
|
|
19
|
+
import pandas as pd
|
|
20
|
+
if hasattr(self, "df") and isinstance(self.df, pd.DataFrame) and "ID" in self.df.columns:
|
|
21
|
+
return pd.DataFrame(self.df["ID"].astype(str).unique())
|
|
22
|
+
return pd.DataFrame([])
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def ID(self):
|
|
26
|
+
import pandas as pd
|
|
27
|
+
if hasattr(self, "df") and isinstance(self.df, pd.DataFrame) and "ID" in self.df.columns:
|
|
28
|
+
return pd.DataFrame(self.df["ID"].astype(str).unique())
|
|
29
|
+
return pd.DataFrame([])
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def Id(self):
|
|
33
|
+
import pandas as pd
|
|
34
|
+
if hasattr(self, "df") and isinstance(self.df, pd.DataFrame) and "ID" in self.df.columns:
|
|
35
|
+
return pd.DataFrame(self.df["ID"].astype(str).unique())
|
|
36
|
+
return pd.DataFrame([])
|
|
37
|
+
|
|
38
|
+
@staticmethod
|
|
39
|
+
def _df4(rows_list):
|
|
40
|
+
if not isinstance(rows_list, list):
|
|
41
|
+
return rows_list
|
|
42
|
+
return pd.DataFrame(rows_list, index=FitResult._ROW4[:len(rows_list)])
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def _df_perf(rows_list):
|
|
46
|
+
if not isinstance(rows_list, list):
|
|
47
|
+
return rows_list
|
|
48
|
+
return pd.DataFrame(rows_list, index=FitResult._PERF5[:len(rows_list)])
|
|
49
|
+
|
|
50
|
+
def __init__(self, **kwargs):
|
|
51
|
+
# Convert list→DataFrame for table-like fields
|
|
52
|
+
if "alpha" in kwargs:
|
|
53
|
+
kwargs["alpha"] = self._df4(kwargs["alpha"])
|
|
54
|
+
if "beta" in kwargs:
|
|
55
|
+
kwargs["beta"] = self._df4(kwargs["beta"])
|
|
56
|
+
if "Performance" in kwargs:
|
|
57
|
+
kwargs["Performance"] = self._df_perf(kwargs["Performance"])
|
|
58
|
+
if "df" in kwargs and isinstance(kwargs["df"], list):
|
|
59
|
+
kwargs["df"] = pd.DataFrame(kwargs["df"])
|
|
60
|
+
super().__init__(**kwargs)
|
|
61
|
+
|
|
62
|
+
class FitResultDict(dict):
|
|
63
|
+
@property
|
|
64
|
+
def id(self):
|
|
65
|
+
return pd.DataFrame({"ID": list(self.keys())})
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def ID(self):
|
|
69
|
+
return pd.DataFrame({"ID": list(self.keys())})
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def Id(self):
|
|
73
|
+
return pd.DataFrame({"ID": list(self.keys())})
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class DATFIDClient:
|
|
77
|
+
def __init__(self, token: str):
|
|
78
|
+
self.api_url = "https://datfid-org-datfid-master.hf.space/"
|
|
79
|
+
self.headers = {"Authorization": f"Bearer {token}"}
|
|
80
|
+
self.logger = logging.getLogger(__name__)
|
|
81
|
+
|
|
82
|
+
def _cleanup_memory(self):
|
|
83
|
+
"""Clean up memory after operations"""
|
|
84
|
+
gc.collect()
|
|
85
|
+
if hasattr(psutil, 'Process'):
|
|
86
|
+
process = psutil.Process()
|
|
87
|
+
try:
|
|
88
|
+
process.memory_info().rss # Force memory info update
|
|
89
|
+
except:
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
def ping(self):
|
|
93
|
+
try:
|
|
94
|
+
response = requests.get(self.api_url, headers=self.headers).json()
|
|
95
|
+
self._cleanup_memory()
|
|
96
|
+
return response
|
|
97
|
+
except Exception as e:
|
|
98
|
+
self.logger.error(f"Ping failed: {str(e)}")
|
|
99
|
+
raise
|
|
100
|
+
|
|
101
|
+
def secure_ping(self):
|
|
102
|
+
try:
|
|
103
|
+
response = requests.get(f"{self.api_url}secure-ping/", headers=self.headers).json()
|
|
104
|
+
self._cleanup_memory()
|
|
105
|
+
return response
|
|
106
|
+
except Exception as e:
|
|
107
|
+
self.logger.error(f"Secure ping failed: {str(e)}")
|
|
108
|
+
raise
|
|
109
|
+
|
|
110
|
+
def fit_model(
|
|
111
|
+
self,
|
|
112
|
+
df: pd.DataFrame,
|
|
113
|
+
id_col: str,
|
|
114
|
+
time_col: str,
|
|
115
|
+
y: str,
|
|
116
|
+
lag_y: Optional[Union[int, str, list[int]]] = None,
|
|
117
|
+
lagged_features: Optional[Dict[str, int]] = None,
|
|
118
|
+
current_features: Optional[list] = None,
|
|
119
|
+
filter_by_significance: bool = False,
|
|
120
|
+
meanvar_test: bool = False,
|
|
121
|
+
signif: float = 0.05
|
|
122
|
+
) -> FitResult:
|
|
123
|
+
"""
|
|
124
|
+
Fit a model using the DATFID API.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
df: DataFrame containing the data
|
|
128
|
+
id_col: Name of the ID column
|
|
129
|
+
time_col: Name of the time column
|
|
130
|
+
y: Name of the target variable
|
|
131
|
+
lagged_features: Dictionary of features and their lag values
|
|
132
|
+
current_features: List of current features to use
|
|
133
|
+
filter_by_significance: Whether to filter features by significance
|
|
134
|
+
meanvar_test: Whether to perform mean-variance test
|
|
135
|
+
signif: Significance level (default 0.05), used when filter_by_significance is True
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
SimpleNamespace containing the model fit results
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
df = df.copy()
|
|
142
|
+
for col in df.columns:
|
|
143
|
+
if pd.api.types.is_datetime64_any_dtype(df[col]):
|
|
144
|
+
df[col] = df[col].astype(str)
|
|
145
|
+
|
|
146
|
+
data = {
|
|
147
|
+
"df": df.to_dict(orient="records"),
|
|
148
|
+
"id_col": id_col,
|
|
149
|
+
"time_col": time_col,
|
|
150
|
+
"y": y,
|
|
151
|
+
"lag_y": lag_y,
|
|
152
|
+
"lagged_features": lagged_features or {},
|
|
153
|
+
"current_features": current_features or [],
|
|
154
|
+
"filter_by_significance": filter_by_significance,
|
|
155
|
+
"meanvar_test": meanvar_test,
|
|
156
|
+
"signif": signif
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
response = requests.post(
|
|
160
|
+
f"{self.api_url}modelfit/",
|
|
161
|
+
json=data,
|
|
162
|
+
headers=self.headers
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
if response.status_code != 200:
|
|
166
|
+
raise Exception(f"Model fit failed: {response.text}")
|
|
167
|
+
|
|
168
|
+
result_dict = response.json()
|
|
169
|
+
return FitResult(**result_dict)
|
|
170
|
+
|
|
171
|
+
def forecast_model(
|
|
172
|
+
self,
|
|
173
|
+
df_forecast: pd.DataFrame
|
|
174
|
+
) -> pd.DataFrame:
|
|
175
|
+
"""
|
|
176
|
+
Generate forecasts using the fitted model.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
df_forecast: DataFrame containing the forecast data
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
DataFrame containing the forecast results
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
df_forecast = df_forecast.copy()
|
|
187
|
+
for col in df_forecast.columns:
|
|
188
|
+
if pd.api.types.is_datetime64_any_dtype(df_forecast[col]):
|
|
189
|
+
df_forecast[col] = df_forecast[col].astype(str)
|
|
190
|
+
|
|
191
|
+
# Convert DataFrame to list of records
|
|
192
|
+
data = df_forecast.to_dict(orient="records")
|
|
193
|
+
|
|
194
|
+
response = requests.post(
|
|
195
|
+
f"{self.api_url}modelforecast/",
|
|
196
|
+
json=data,
|
|
197
|
+
headers=self.headers
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
if response.status_code != 200:
|
|
201
|
+
raise Exception(f"Forecast generation failed: {response.text}")
|
|
202
|
+
|
|
203
|
+
result = pd.DataFrame(response.json())
|
|
204
|
+
|
|
205
|
+
# Clean up memory after operation
|
|
206
|
+
del df_forecast
|
|
207
|
+
del data
|
|
208
|
+
self._cleanup_memory()
|
|
209
|
+
|
|
210
|
+
return result
|
|
211
|
+
except Exception as e:
|
|
212
|
+
self.logger.error(f"Forecast generation failed: {str(e)}")
|
|
213
|
+
raise
|
|
214
|
+
|
|
215
|
+
def fit_model_ind(
|
|
216
|
+
self,
|
|
217
|
+
df: pd.DataFrame,
|
|
218
|
+
id_col: str,
|
|
219
|
+
time_col: str,
|
|
220
|
+
y: str,
|
|
221
|
+
lag_y: Optional[Union[int, str, list[int]]] = None,
|
|
222
|
+
lagged_features: Optional[Dict[str, int]] = None,
|
|
223
|
+
current_features: Optional[list] = None,
|
|
224
|
+
filter_by_significance: bool = False,
|
|
225
|
+
meanvar_test: bool = False,
|
|
226
|
+
signif: float = 0.05
|
|
227
|
+
) -> FitResultDict:
|
|
228
|
+
"""
|
|
229
|
+
Fit a model individual by individual using the DATFID API.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
df: DataFrame containing the data
|
|
233
|
+
id_col: Name of the ID column
|
|
234
|
+
time_col: Name of the time column
|
|
235
|
+
y: Name of the target variable
|
|
236
|
+
lagged_features: Dictionary of features and their lag values
|
|
237
|
+
current_features: List of current features to use
|
|
238
|
+
filter_by_significance: Whether to filter features by significance
|
|
239
|
+
meanvar_test: Whether to perform mean-variance test
|
|
240
|
+
signif: Significance level (default 0.05), used when filter_by_significance is True
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
SimpleNamespace containing the model fit results
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
df = df.copy()
|
|
247
|
+
for col in df.columns:
|
|
248
|
+
if pd.api.types.is_datetime64_any_dtype(df[col]):
|
|
249
|
+
df[col] = df[col].astype(str)
|
|
250
|
+
|
|
251
|
+
data = {
|
|
252
|
+
"df": df.to_dict(orient="records"),
|
|
253
|
+
"id_col": id_col,
|
|
254
|
+
"time_col": time_col,
|
|
255
|
+
"y": y,
|
|
256
|
+
"lag_y": lag_y,
|
|
257
|
+
"lagged_features": lagged_features or {},
|
|
258
|
+
"current_features": current_features or [],
|
|
259
|
+
"filter_by_significance": filter_by_significance,
|
|
260
|
+
"meanvar_test": meanvar_test,
|
|
261
|
+
"signif": signif
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
response = requests.post(
|
|
265
|
+
f"{self.api_url}modelfit_ind/",
|
|
266
|
+
json=data,
|
|
267
|
+
headers=self.headers
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
if response.status_code != 200:
|
|
271
|
+
raise Exception(f"Model fit failed: {response.text}")
|
|
272
|
+
|
|
273
|
+
raw = response.json()
|
|
274
|
+
# Wrap each per-id result into a SimpleNamespace for dot access:
|
|
275
|
+
result_per_id = FitResultDict({str(k): FitResult(**v) for k, v in raw.items()})
|
|
276
|
+
return result_per_id # FitResultDict[str, SimpleNamespace]
|
|
277
|
+
|
|
278
|
+
def forecast_model_ind(
|
|
279
|
+
self,
|
|
280
|
+
df_forecast: pd.DataFrame
|
|
281
|
+
) -> pd.DataFrame:
|
|
282
|
+
"""
|
|
283
|
+
Generate forecasts using the fitted individual by individual model.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
df_forecast: DataFrame containing the forecast data
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
DataFrame containing the forecast results
|
|
290
|
+
"""
|
|
291
|
+
|
|
292
|
+
try:
|
|
293
|
+
df_forecast = df_forecast.copy()
|
|
294
|
+
for col in df_forecast.columns:
|
|
295
|
+
if pd.api.types.is_datetime64_any_dtype(df_forecast[col]):
|
|
296
|
+
df_forecast[col] = df_forecast[col].astype(str)
|
|
297
|
+
|
|
298
|
+
# Convert DataFrame to list of records
|
|
299
|
+
data = df_forecast.to_dict(orient="records")
|
|
300
|
+
|
|
301
|
+
response = requests.post(
|
|
302
|
+
f"{self.api_url}modelforecast_ind/",
|
|
303
|
+
json=data,
|
|
304
|
+
headers=self.headers
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
if response.status_code != 200:
|
|
308
|
+
raise Exception(f"Forecast generation failed: {response.text}")
|
|
309
|
+
|
|
310
|
+
result = pd.DataFrame(response.json())
|
|
311
|
+
|
|
312
|
+
# Clean up memory after operation
|
|
313
|
+
del df_forecast
|
|
314
|
+
del data
|
|
315
|
+
self._cleanup_memory()
|
|
316
|
+
|
|
317
|
+
return result
|
|
318
|
+
except Exception as e:
|
|
319
|
+
self.logger.error(f"Forecast generation failed: {str(e)}")
|
|
320
|
+
raise
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datfid
|
|
3
|
+
Version: 0.1.21
|
|
4
|
+
Summary: SDK to access the DATFID API hosted on Hugging Face Spaces
|
|
5
|
+
Author: DATFID
|
|
6
|
+
Author-email: igor.schapiro@datfid.com
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Requires-Python: >=3.7
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: requests>=2.31.0
|
|
14
|
+
Requires-Dist: pandas>=1.0.1
|
|
15
|
+
Requires-Dist: numpy<2.1,>=1.22
|
|
16
|
+
Dynamic: author
|
|
17
|
+
Dynamic: author-email
|
|
18
|
+
Dynamic: classifier
|
|
19
|
+
Dynamic: description
|
|
20
|
+
Dynamic: description-content-type
|
|
21
|
+
Dynamic: license
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
Dynamic: requires-dist
|
|
24
|
+
Dynamic: requires-python
|
|
25
|
+
Dynamic: summary
|
|
26
|
+
|
|
27
|
+
# DATFID SDK
|
|
28
|
+
|
|
29
|
+
A Python SDK to access the DATFID API to forecast your data.
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
- **Easy model fitting**: Build panel data models with time-dependent and static features.
|
|
34
|
+
- **Flexible lag handling**: Specify lags for the dependent variable and selected features.
|
|
35
|
+
- **Forecasting**: Generate future predictions with aligned timestamps and IDs.
|
|
36
|
+
- **Statistical options**: Filter features by significance and apply mean-variance tests.
|
|
37
|
+
- **White box full interpretability**: Get fully interpretable model with equation, estimated parameters, and standard errors.
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install datfid
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Usage
|
|
46
|
+
|
|
47
|
+
Before using the SDK, please request an access token by emailing **admin@datfid.com** or by visiting our website [datfid.com](https://datfid.com).
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from datfid import DATFIDClient
|
|
51
|
+
|
|
52
|
+
# Initialize the client with your DATFID token
|
|
53
|
+
client = DATFIDClient(token="your_DATFID_token")
|
|
54
|
+
|
|
55
|
+
# Fit a model
|
|
56
|
+
fit_result = client.fit_model(
|
|
57
|
+
df=dataframe,
|
|
58
|
+
id_col="name of id column",
|
|
59
|
+
time_col="name of time column",
|
|
60
|
+
y="name of dependent variable",
|
|
61
|
+
lag_y="starting lag : ending lag",
|
|
62
|
+
lagged_features={
|
|
63
|
+
"feature 1": "starting lag : ending lag",
|
|
64
|
+
"feature 2": "starting lag : ending lag"
|
|
65
|
+
},
|
|
66
|
+
current_features=["feature 3", "feature 4"],
|
|
67
|
+
filter_by_significance=True/False,
|
|
68
|
+
meanvar_test=True/False
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Generate forecasts
|
|
72
|
+
forecast_df = client.forecast_model(
|
|
73
|
+
df_forecast=dataframe
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# The forecast DataFrame contains the individual IDs and timestamps
|
|
77
|
+
# from the original data plus a "forecast" column with predicted values.
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Example 1
|
|
81
|
+
|
|
82
|
+
Sample dataset from GitHub (Food and Beverages demand forecasting):
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
import pandas as pd
|
|
86
|
+
from datfid import DATFIDClient
|
|
87
|
+
|
|
88
|
+
# Initialize the client with your DATFID token
|
|
89
|
+
client = DATFIDClient(token="your_DATFID_token")
|
|
90
|
+
|
|
91
|
+
# Load dataset for model fitting
|
|
92
|
+
url_fit = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Food_Beverages.xlsx"
|
|
93
|
+
df = pd.read_excel(url_fit)
|
|
94
|
+
|
|
95
|
+
# Fit the model
|
|
96
|
+
result = client.fit_model(df=df,
|
|
97
|
+
id_col="Product",
|
|
98
|
+
time_col="Time",
|
|
99
|
+
y="Revenue",
|
|
100
|
+
current_features='all',
|
|
101
|
+
filter_by_significance=True
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Load dataset for forecasting
|
|
105
|
+
url_forecast = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Food_Beverages_forecast.xlsx"
|
|
106
|
+
df_forecast = pd.read_excel(url_forecast)
|
|
107
|
+
|
|
108
|
+
# Forecast revenue using the fitted model
|
|
109
|
+
forecast = client.forecast_model(df_forecast=df_forecast)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Example 2
|
|
113
|
+
|
|
114
|
+
Slightly larger sample dataset from GitHub (Banking sector, forecasting loan probability):
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
import pandas as pd
|
|
118
|
+
from datfid import DATFIDClient
|
|
119
|
+
|
|
120
|
+
# Initialize the client with your DATFID token
|
|
121
|
+
client = DATFIDClient(token="your_DATFID_token")
|
|
122
|
+
|
|
123
|
+
# Load dataset for model fitting
|
|
124
|
+
url_fit = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Banking_extended.xlsx"
|
|
125
|
+
df = pd.read_excel(url_fit)
|
|
126
|
+
|
|
127
|
+
# Fit the model
|
|
128
|
+
result = client.fit_model(df=df,
|
|
129
|
+
id_col="Individual",
|
|
130
|
+
time_col="Time",
|
|
131
|
+
y="Loan Probability",
|
|
132
|
+
lag_y="1:3",
|
|
133
|
+
lagged_features={"Income Level": "1:3"},
|
|
134
|
+
filter_by_significance=True)
|
|
135
|
+
|
|
136
|
+
# Load dataset for forecasting
|
|
137
|
+
url_forecast = "https://raw.githubusercontent.com/datfid-valeriidashuk/sample-datasets/main/Banking_extended_forecast.xlsx"
|
|
138
|
+
df_forecast = pd.read_excel(url_forecast)
|
|
139
|
+
|
|
140
|
+
# Forecast loan probability using the fitted model
|
|
141
|
+
forecast = client.forecast_model(df_forecast=df_forecast)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## API Reference
|
|
145
|
+
|
|
146
|
+
### DATFIDClient
|
|
147
|
+
|
|
148
|
+
#### `client = DATFIDClient(token: str)`
|
|
149
|
+
Initialize the client with your DATFID token.
|
|
150
|
+
|
|
151
|
+
#### `client.fit_model(df: pd.DataFrame, id_col: str, time_col: str, y: str, lag_y: Optional[Union[int, str, list[int]]] = None, lagged_features: Optional[Dict[str, int]] = None, current_features: Optional[list] = None, filter_by_significance: bool = False, meanvar_test: bool = False) -> SimpleNamespace`
|
|
152
|
+
Fit a model using the provided dataset.
|
|
153
|
+
|
|
154
|
+
#### `client.forecast_model(df_forecast: pd.DataFrame) -> pd.DataFrame`
|
|
155
|
+
Generate forecasts using the fitted model.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
datfid
|
datfid-0.1.21/setup.cfg
ADDED
datfid-0.1.21/setup.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
# to build:
|
|
4
|
+
# 1) open this file at level of datfid-sdk folder
|
|
5
|
+
# 2) change version in this file and save it
|
|
6
|
+
# 3) delete folder datfid.egg-info
|
|
7
|
+
# 4) delete older files from dist folder
|
|
8
|
+
# 5) in terminal: python setup.py sdist bdist_wheel
|
|
9
|
+
# 6) in terminal: twine upload --repository pypi dist/*
|
|
10
|
+
# 7) in hugging face delete older files from dist folder
|
|
11
|
+
# 8) in hugging face upload updated files
|
|
12
|
+
# 9) in terminal uninstall older version of datfid: pip uninstall datfid
|
|
13
|
+
# 10) in terminal install new version of datfid: pip install --index-url https://test.pypi.org/simple/ datfid
|
|
14
|
+
|
|
15
|
+
with open("README.md", "r", encoding="utf-8") as fh:
|
|
16
|
+
long_description = fh.read()
|
|
17
|
+
|
|
18
|
+
setup(
|
|
19
|
+
name="datfid",
|
|
20
|
+
version="0.1.21",
|
|
21
|
+
description="SDK to access the DATFID API hosted on Hugging Face Spaces",
|
|
22
|
+
long_description=long_description,
|
|
23
|
+
long_description_content_type="text/markdown", # Important!
|
|
24
|
+
author="DATFID",
|
|
25
|
+
author_email="igor.schapiro@datfid.com",
|
|
26
|
+
license="MIT",
|
|
27
|
+
packages=find_packages(),
|
|
28
|
+
install_requires=[
|
|
29
|
+
"requests>=2.31.0",
|
|
30
|
+
"pandas>=1.0.1",
|
|
31
|
+
"numpy>=1.22, <2.1"
|
|
32
|
+
],
|
|
33
|
+
python_requires=">=3.7",
|
|
34
|
+
classifiers=[
|
|
35
|
+
"License :: OSI Approved :: MIT License",
|
|
36
|
+
"Programming Language :: Python :: 3",
|
|
37
|
+
],
|
|
38
|
+
)
|