notionhelper 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionhelper-0.3.2/ML_DEMO_README.md +315 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/PKG-INFO +1 -1
- notionhelper-0.3.2/examples/ml_demo.py +391 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/pyproject.toml +1 -1
- {notionhelper-0.3.1 → notionhelper-0.3.2}/uv.lock +1 -1
- {notionhelper-0.3.1 → notionhelper-0.3.2}/.coverage +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/.github/workflows/claude-code-review.yml +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/.github/workflows/claude.yml +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/.gitignore +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/GETTING_STARTED.md +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/README.md +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/images/helper_logo.png +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/images/json_builder.png.png +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/images/logo.png +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/images/notionh3.png +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/images/pillio.png +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/images/pillio2.png +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/notionapi_md_info.md +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/pytest.ini +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/src/notionhelper/__init__.py +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/src/notionhelper/helper.py +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/tests/README.md +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/tests/__init__.py +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/tests/conftest.py +0 -0
- {notionhelper-0.3.1 → notionhelper-0.3.2}/tests/test_helper.py +0 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# NotionHelper ML Demo Guide
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
`ml_demo.py` is a comprehensive demonstration of how to use **NotionHelper** to track machine learning experiments. It showcases a complete workflow from model training to Notion integration.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
✨ **Complete ML Pipeline**
|
|
10
|
+
- Logistic Regression on sklearn's breast cancer dataset
|
|
11
|
+
- Train/test split with stratification
|
|
12
|
+
- Feature scaling
|
|
13
|
+
- Comprehensive metrics calculation
|
|
14
|
+
|
|
15
|
+
📊 **Metrics Tracked**
|
|
16
|
+
- Accuracy
|
|
17
|
+
- Precision
|
|
18
|
+
- Recall
|
|
19
|
+
- F1 Score
|
|
20
|
+
- ROC AUC
|
|
21
|
+
- Training/Test sample sizes
|
|
22
|
+
|
|
23
|
+
📈 **Visualizations**
|
|
24
|
+
- Confusion Matrix (heatmap)
|
|
25
|
+
- ROC Curve with AUC score
|
|
26
|
+
- Feature Importance (when scaling is disabled)
|
|
27
|
+
|
|
28
|
+
💾 **Artifacts**
|
|
29
|
+
- Predictions CSV with probabilities
|
|
30
|
+
- Classification report
|
|
31
|
+
- All generated plots
|
|
32
|
+
|
|
33
|
+
## Quick Start
|
|
34
|
+
|
|
35
|
+
### 1. Run the Demo (without Notion)
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
python ml_demo.py
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
This will:
|
|
42
|
+
- Train the model
|
|
43
|
+
- Generate all metrics and plots
|
|
44
|
+
- Save artifacts to disk
|
|
45
|
+
- Show instructions for Notion integration
|
|
46
|
+
|
|
47
|
+
### 2. Set Up Notion Integration
|
|
48
|
+
|
|
49
|
+
#### A. Get Your Notion API Token
|
|
50
|
+
|
|
51
|
+
1. Go to [Notion Integrations](https://www.notion.so/my-integrations)
|
|
52
|
+
2. Create a new integration
|
|
53
|
+
3. Copy the "Internal Integration Token"
|
|
54
|
+
4. Set it as an environment variable:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
export NOTION_TOKEN='secret_your_token_here'
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
#### B. Create a Parent Page
|
|
61
|
+
|
|
62
|
+
1. Create a new page in Notion (this will hold your ML experiment databases)
|
|
63
|
+
2. Share the page with your integration
|
|
64
|
+
3. Copy the page ID from the URL:
|
|
65
|
+
- URL: `https://www.notion.so/My-ML-Experiments-abc123def456...`
|
|
66
|
+
- Page ID: `abc123def456...`
|
|
67
|
+
|
|
68
|
+
#### C. Create the Database (First Time Only)
|
|
69
|
+
|
|
70
|
+
1. Open `ml_demo.py`
|
|
71
|
+
2. Find the "STEP 4A" section
|
|
72
|
+
3. Uncomment the database creation code
|
|
73
|
+
4. Set `PARENT_PAGE_ID = "your_page_id_here"`
|
|
74
|
+
5. Run the script:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
python ml_demo.py
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
6. **IMPORTANT**: Copy the `data_source_id` from the output!
|
|
81
|
+
|
|
82
|
+
Example output:
|
|
83
|
+
```
|
|
84
|
+
✓ Database created! Data Source ID: 2d2fdfd6-8a97-80ba-bdd6-000b787993a4
|
|
85
|
+
💡 Save this ID for future experiment logging!
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
#### D. Log Experiments
|
|
89
|
+
|
|
90
|
+
1. Comment out the database creation code (STEP 4A)
|
|
91
|
+
2. Set `DATA_SOURCE_ID = "your_data_source_id_from_step_C"`
|
|
92
|
+
3. Run experiments:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
python ml_demo.py
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Each run will:
|
|
99
|
+
- Create a new row in your Notion database
|
|
100
|
+
- Upload confusion matrix and ROC curve plots
|
|
101
|
+
- Attach CSV artifacts
|
|
102
|
+
- Compare metrics with previous runs
|
|
103
|
+
- Show 🏆 if it's a new best score!
|
|
104
|
+
|
|
105
|
+
## Customization
|
|
106
|
+
|
|
107
|
+
### Hyperparameters
|
|
108
|
+
|
|
109
|
+
Modify the `config` dictionary in the `main()` function:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
config = {
|
|
113
|
+
"Experiment_Name": "Your Experiment",
|
|
114
|
+
"Model": "Logistic Regression",
|
|
115
|
+
"C_Regularization": 10.0, # Change this
|
|
116
|
+
"Max_Iterations": 2000, # Or this
|
|
117
|
+
"Solver": "saga", # Try different solvers
|
|
118
|
+
"Penalty": "l1", # L1 or L2 regularization
|
|
119
|
+
"Feature_Scaling": True # Enable/disable scaling
|
|
120
|
+
}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Target Metric
|
|
124
|
+
|
|
125
|
+
Change which metric to optimize in the `log_ml_experiment()` call:
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
page_id = nh.log_ml_experiment(
|
|
129
|
+
...
|
|
130
|
+
target_metric="Accuracy", # Or "Precision", "Recall", "F1_Score"
|
|
131
|
+
higher_is_better=True, # Higher scores are better
|
|
132
|
+
...
|
|
133
|
+
)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Example Workflow
|
|
137
|
+
|
|
138
|
+
### Experiment 1: Baseline
|
|
139
|
+
```python
|
|
140
|
+
config = {
|
|
141
|
+
"C_Regularization": 1.0,
|
|
142
|
+
"Penalty": "l2",
|
|
143
|
+
"Solver": "lbfgs"
|
|
144
|
+
}
|
|
145
|
+
# Results: F1 Score = 98.61%
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Experiment 2: Stronger Regularization
|
|
149
|
+
```python
|
|
150
|
+
config = {
|
|
151
|
+
"C_Regularization": 0.1, # Stronger regularization
|
|
152
|
+
"Penalty": "l2",
|
|
153
|
+
"Solver": "lbfgs"
|
|
154
|
+
}
|
|
155
|
+
# Run to see if it improves performance
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Experiment 3: L1 Regularization
|
|
159
|
+
```python
|
|
160
|
+
config = {
|
|
161
|
+
"C_Regularization": 1.0,
|
|
162
|
+
"Penalty": "l1", # Switch to L1
|
|
163
|
+
"Solver": "saga" # L1 requires saga or liblinear
|
|
164
|
+
}
|
|
165
|
+
# L1 can perform feature selection
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Generated Files
|
|
169
|
+
|
|
170
|
+
After running the demo, you'll find:
|
|
171
|
+
|
|
172
|
+
```
|
|
173
|
+
├── confusion_matrix.png # Confusion matrix heatmap
|
|
174
|
+
├── roc_curve.png # ROC curve with AUC
|
|
175
|
+
├── feature_importance.png # Feature coefficients (if no scaling)
|
|
176
|
+
├── predictions.csv # Test set predictions
|
|
177
|
+
└── classification_report.csv # Detailed metrics per class
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## Notion Database Schema
|
|
181
|
+
|
|
182
|
+
The created database will have columns for:
|
|
183
|
+
|
|
184
|
+
**Config Fields:**
|
|
185
|
+
- Experiment_Name (Title)
|
|
186
|
+
- Model
|
|
187
|
+
- Dataset
|
|
188
|
+
- Test_Size
|
|
189
|
+
- Random_State
|
|
190
|
+
- C_Regularization (Number)
|
|
191
|
+
- Max_Iterations (Number)
|
|
192
|
+
- Solver
|
|
193
|
+
- Penalty
|
|
194
|
+
- Feature_Scaling (Checkbox) ✅
|
|
195
|
+
|
|
196
|
+
**Metric Fields:**
|
|
197
|
+
- Accuracy (Number)
|
|
198
|
+
- Precision (Number)
|
|
199
|
+
- Recall (Number)
|
|
200
|
+
- F1_Score (Number)
|
|
201
|
+
- ROC_AUC (Number)
|
|
202
|
+
- Train_Samples (Number)
|
|
203
|
+
- Test_Samples (Number)
|
|
204
|
+
- Run Status (shows 🏆 for new best)
|
|
205
|
+
|
|
206
|
+
**Artifacts:**
|
|
207
|
+
- Plots (embedded in page body)
|
|
208
|
+
- Artifacts (attached CSV files)
|
|
209
|
+
|
|
210
|
+
## Troubleshooting
|
|
211
|
+
|
|
212
|
+
### Boolean Properties Showing as Numbers
|
|
213
|
+
|
|
214
|
+
If you see boolean values (like `Feature_Scaling`) appearing as numbers in Notion:
|
|
215
|
+
|
|
216
|
+
1. Check the debug output in the console
|
|
217
|
+
2. Ensure you're passing Python `bool` types (not 0/1 integers)
|
|
218
|
+
3. The `dict_to_notion_schema()` includes debug prints to help diagnose
|
|
219
|
+
|
|
220
|
+
### Notion API Errors
|
|
221
|
+
|
|
222
|
+
Common issues:
|
|
223
|
+
- **401 Unauthorized**: Check your NOTION_TOKEN
|
|
224
|
+
- **404 Not Found**: Verify your PARENT_PAGE_ID or DATA_SOURCE_ID
|
|
225
|
+
- **400 Bad Request**: Make sure the page is shared with your integration
|
|
226
|
+
|
|
227
|
+
### Missing Plots
|
|
228
|
+
|
|
229
|
+
Ensure matplotlib and seaborn are installed:
|
|
230
|
+
```bash
|
|
231
|
+
pip install matplotlib seaborn
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## Advanced Usage
|
|
235
|
+
|
|
236
|
+
### Use Your Own Dataset
|
|
237
|
+
|
|
238
|
+
Replace the data loading section:
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
# Replace this:
|
|
242
|
+
data = load_breast_cancer()
|
|
243
|
+
X = pd.DataFrame(data.data, columns=data.feature_names)
|
|
244
|
+
y = pd.Series(data.target, name='target')
|
|
245
|
+
|
|
246
|
+
# With your own data:
|
|
247
|
+
df = pd.read_csv('your_data.csv')
|
|
248
|
+
X = df.drop('target_column', axis=1)
|
|
249
|
+
y = df['target_column']
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
### Add More Metrics
|
|
253
|
+
|
|
254
|
+
Calculate additional metrics:
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
from sklearn.metrics import matthews_corrcoef, balanced_accuracy_score
|
|
258
|
+
|
|
259
|
+
metrics = {
|
|
260
|
+
...
|
|
261
|
+
"MCC": round(matthews_corrcoef(y_test, y_pred), 4),
|
|
262
|
+
"Balanced_Accuracy": round(balanced_accuracy_score(y_test, y_pred) * 100, 2)
|
|
263
|
+
}
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
### Grid Search Integration
|
|
267
|
+
|
|
268
|
+
Combine with sklearn's GridSearchCV:
|
|
269
|
+
|
|
270
|
+
```python
|
|
271
|
+
from sklearn.model_selection import GridSearchCV
|
|
272
|
+
|
|
273
|
+
param_grid = {
|
|
274
|
+
'C': [0.1, 1.0, 10.0],
|
|
275
|
+
'penalty': ['l1', 'l2']
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
grid = GridSearchCV(LogisticRegression(), param_grid, cv=5)
|
|
279
|
+
grid.fit(X_train, y_train)
|
|
280
|
+
|
|
281
|
+
# Log each configuration
|
|
282
|
+
for params, mean_score in zip(grid.cv_results_['params'],
|
|
283
|
+
grid.cv_results_['mean_test_score']):
|
|
284
|
+
config.update(params)
|
|
285
|
+
metrics['CV_Score'] = mean_score
|
|
286
|
+
nh.log_ml_experiment(...)
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
## Benefits of Using NotionHelper
|
|
290
|
+
|
|
291
|
+
✅ **Centralized Tracking**: All experiments in one place
|
|
292
|
+
✅ **Visual Comparison**: See which hyperparameters work best
|
|
293
|
+
✅ **Automatic Leaderboard**: Highlights new best scores
|
|
294
|
+
✅ **File Attachments**: Keep plots and CSVs with experiments
|
|
295
|
+
✅ **Team Collaboration**: Share results with your team
|
|
296
|
+
✅ **Reproducibility**: Track all hyperparameters and seeds
|
|
297
|
+
|
|
298
|
+
## Next Steps
|
|
299
|
+
|
|
300
|
+
1. **Run the demo** to familiarize yourself with the workflow
|
|
301
|
+
2. **Create your Notion database** following the setup guide
|
|
302
|
+
3. **Customize for your project** - replace with your ML model
|
|
303
|
+
4. **Run multiple experiments** with different hyperparameters
|
|
304
|
+
5. **Review results in Notion** - compare and analyze performance
|
|
305
|
+
|
|
306
|
+
## Support
|
|
307
|
+
|
|
308
|
+
For issues or questions:
|
|
309
|
+
- Check the [NotionHelper documentation](carecast/notionhelper.py)
|
|
310
|
+
- Review the [Notion API docs](https://developers.notion.com/)
|
|
311
|
+
- Examine the debug output for type checking issues
|
|
312
|
+
|
|
313
|
+
---
|
|
314
|
+
|
|
315
|
+
**Happy Experimenting! 🚀**
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: notionhelper
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: NotionHelper is a Python library that simplifies interactions with the Notion API, enabling easy management of databases, pages, and files within Notion workspaces.
|
|
5
5
|
Author-email: Jan du Plessis <drjanduplessis@icloud.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NotionHelper ML Demo: Logistic Regression with sklearn
|
|
3
|
+
=======================================================
|
|
4
|
+
This demo showcases how to use NotionHelper to track ML experiments.
|
|
5
|
+
|
|
6
|
+
Features:
|
|
7
|
+
- Logistic regression on sklearn's breast cancer dataset
|
|
8
|
+
- Complete metrics tracking (accuracy, precision, recall, F1)
|
|
9
|
+
- Hyperparameter configuration
|
|
10
|
+
- Automatic Notion database creation
|
|
11
|
+
- Experiment logging with plots and artifacts
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
import matplotlib.pyplot as plt
|
|
18
|
+
import seaborn as sns
|
|
19
|
+
from sklearn.datasets import load_breast_cancer
|
|
20
|
+
from sklearn.model_selection import train_test_split
|
|
21
|
+
from sklearn.linear_model import LogisticRegression
|
|
22
|
+
from sklearn.metrics import (
|
|
23
|
+
accuracy_score,
|
|
24
|
+
precision_score,
|
|
25
|
+
recall_score,
|
|
26
|
+
f1_score,
|
|
27
|
+
confusion_matrix,
|
|
28
|
+
classification_report,
|
|
29
|
+
roc_curve,
|
|
30
|
+
roc_auc_score
|
|
31
|
+
)
|
|
32
|
+
from sklearn.preprocessing import StandardScaler
|
|
33
|
+
|
|
34
|
+
from carecast.notionhelper import NotionHelper
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def train_logistic_regression(
|
|
38
|
+
test_size=0.2,
|
|
39
|
+
random_state=42,
|
|
40
|
+
C=1.0,
|
|
41
|
+
max_iter=1000,
|
|
42
|
+
solver='lbfgs',
|
|
43
|
+
penalty='l2',
|
|
44
|
+
scale_features=True
|
|
45
|
+
):
|
|
46
|
+
"""
|
|
47
|
+
Train a logistic regression model on breast cancer dataset.
|
|
48
|
+
|
|
49
|
+
Parameters:
|
|
50
|
+
-----------
|
|
51
|
+
test_size : float
|
|
52
|
+
Proportion of dataset to use for testing
|
|
53
|
+
random_state : int
|
|
54
|
+
Random seed for reproducibility
|
|
55
|
+
C : float
|
|
56
|
+
Inverse of regularization strength
|
|
57
|
+
max_iter : int
|
|
58
|
+
Maximum iterations for solver
|
|
59
|
+
solver : str
|
|
60
|
+
Algorithm to use in optimization
|
|
61
|
+
penalty : str
|
|
62
|
+
Regularization penalty type
|
|
63
|
+
scale_features : bool
|
|
64
|
+
Whether to standardize features
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
--------
|
|
68
|
+
metrics : dict
|
|
69
|
+
Dictionary containing all evaluation metrics
|
|
70
|
+
plot_paths : list
|
|
71
|
+
List of paths to generated plots
|
|
72
|
+
artifacts : list
|
|
73
|
+
List of paths to saved artifacts
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
print("\n" + "="*60)
|
|
77
|
+
print("🔬 NOTIONHELPER ML DEMO: Logistic Regression")
|
|
78
|
+
print("="*60 + "\n")
|
|
79
|
+
|
|
80
|
+
# 1. Load Dataset
|
|
81
|
+
print("📊 Loading breast cancer dataset...")
|
|
82
|
+
data = load_breast_cancer()
|
|
83
|
+
X = pd.DataFrame(data.data, columns=data.feature_names)
|
|
84
|
+
y = pd.Series(data.target, name='target')
|
|
85
|
+
|
|
86
|
+
print(f" Dataset shape: {X.shape}")
|
|
87
|
+
print(f" Classes: {data.target_names}")
|
|
88
|
+
print(f" Features: {X.shape[1]}")
|
|
89
|
+
|
|
90
|
+
# 2. Split Data
|
|
91
|
+
print("\n🔀 Splitting data...")
|
|
92
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
93
|
+
X, y, test_size=test_size, random_state=random_state, stratify=y
|
|
94
|
+
)
|
|
95
|
+
print(f" Training set: {X_train.shape[0]} samples")
|
|
96
|
+
print(f" Test set: {X_test.shape[0]} samples")
|
|
97
|
+
|
|
98
|
+
# 3. Feature Scaling (optional but recommended)
|
|
99
|
+
if scale_features:
|
|
100
|
+
print("\n⚖️ Scaling features...")
|
|
101
|
+
scaler = StandardScaler()
|
|
102
|
+
X_train = scaler.fit_transform(X_train)
|
|
103
|
+
X_test = scaler.transform(X_test)
|
|
104
|
+
|
|
105
|
+
# 4. Train Model
|
|
106
|
+
print("\n🤖 Training Logistic Regression model...")
|
|
107
|
+
model = LogisticRegression(
|
|
108
|
+
C=C,
|
|
109
|
+
max_iter=max_iter,
|
|
110
|
+
solver=solver,
|
|
111
|
+
penalty=penalty,
|
|
112
|
+
random_state=random_state
|
|
113
|
+
)
|
|
114
|
+
model.fit(X_train, y_train)
|
|
115
|
+
print(" ✓ Model trained successfully")
|
|
116
|
+
|
|
117
|
+
# 5. Make Predictions
|
|
118
|
+
print("\n🎯 Making predictions...")
|
|
119
|
+
y_pred = model.predict(X_test)
|
|
120
|
+
y_pred_proba = model.predict_proba(X_test)[:, 1]
|
|
121
|
+
|
|
122
|
+
# 6. Calculate Metrics
|
|
123
|
+
print("\n📈 Calculating metrics...")
|
|
124
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
125
|
+
precision = precision_score(y_test, y_pred)
|
|
126
|
+
recall = recall_score(y_test, y_pred)
|
|
127
|
+
f1 = f1_score(y_test, y_pred)
|
|
128
|
+
roc_auc = roc_auc_score(y_test, y_pred_proba)
|
|
129
|
+
|
|
130
|
+
metrics = {
|
|
131
|
+
"Accuracy": round(accuracy * 100, 2),
|
|
132
|
+
"Precision": round(precision * 100, 2),
|
|
133
|
+
"Recall": round(recall * 100, 2),
|
|
134
|
+
"F1_Score": round(f1 * 100, 2),
|
|
135
|
+
"ROC_AUC": round(roc_auc * 100, 2),
|
|
136
|
+
"Train_Samples": int(X_train.shape[0]),
|
|
137
|
+
"Test_Samples": int(X_test.shape[0])
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
# Print metrics
|
|
141
|
+
print("\n" + "="*60)
|
|
142
|
+
print("📊 MODEL PERFORMANCE METRICS")
|
|
143
|
+
print("-" * 60)
|
|
144
|
+
print(f"Accuracy : {metrics['Accuracy']:.2f}%")
|
|
145
|
+
print(f"Precision : {metrics['Precision']:.2f}%")
|
|
146
|
+
print(f"Recall : {metrics['Recall']:.2f}%")
|
|
147
|
+
print(f"F1 Score : {metrics['F1_Score']:.2f}%")
|
|
148
|
+
print(f"ROC AUC : {metrics['ROC_AUC']:.2f}%")
|
|
149
|
+
print("="*60 + "\n")
|
|
150
|
+
|
|
151
|
+
# 7. Generate Visualizations
|
|
152
|
+
print("📊 Generating visualizations...")
|
|
153
|
+
plot_paths = []
|
|
154
|
+
|
|
155
|
+
# Confusion Matrix
|
|
156
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
157
|
+
plt.figure(figsize=(8, 6))
|
|
158
|
+
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
|
|
159
|
+
xticklabels=data.target_names,
|
|
160
|
+
yticklabels=data.target_names)
|
|
161
|
+
plt.title('Confusion Matrix', fontsize=14, fontweight='bold')
|
|
162
|
+
plt.ylabel('True Label')
|
|
163
|
+
plt.xlabel('Predicted Label')
|
|
164
|
+
plt.tight_layout()
|
|
165
|
+
cm_path = 'confusion_matrix.png'
|
|
166
|
+
plt.savefig(cm_path, dpi=150)
|
|
167
|
+
plot_paths.append(cm_path)
|
|
168
|
+
plt.close()
|
|
169
|
+
print(f" ✓ Saved: {cm_path}")
|
|
170
|
+
|
|
171
|
+
# ROC Curve
|
|
172
|
+
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
|
|
173
|
+
plt.figure(figsize=(8, 6))
|
|
174
|
+
plt.plot(fpr, tpr, color='#1f77b4', lw=2,
|
|
175
|
+
label=f'ROC curve (AUC = {roc_auc:.3f})')
|
|
176
|
+
plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--',
|
|
177
|
+
label='Random Classifier')
|
|
178
|
+
plt.xlim([0.0, 1.0])
|
|
179
|
+
plt.ylim([0.0, 1.05])
|
|
180
|
+
plt.xlabel('False Positive Rate', fontsize=12)
|
|
181
|
+
plt.ylabel('True Positive Rate', fontsize=12)
|
|
182
|
+
plt.title('ROC Curve', fontsize=14, fontweight='bold')
|
|
183
|
+
plt.legend(loc="lower right")
|
|
184
|
+
plt.grid(alpha=0.3)
|
|
185
|
+
plt.tight_layout()
|
|
186
|
+
roc_path = 'roc_curve.png'
|
|
187
|
+
plt.savefig(roc_path, dpi=150)
|
|
188
|
+
plot_paths.append(roc_path)
|
|
189
|
+
plt.close()
|
|
190
|
+
print(f" ✓ Saved: {roc_path}")
|
|
191
|
+
|
|
192
|
+
# Feature Importance (Coefficients)
|
|
193
|
+
if not scale_features:
|
|
194
|
+
feature_importance = pd.DataFrame({
|
|
195
|
+
'Feature': data.feature_names,
|
|
196
|
+
'Coefficient': model.coef_[0]
|
|
197
|
+
}).sort_values('Coefficient', key=abs, ascending=False).head(15)
|
|
198
|
+
|
|
199
|
+
plt.figure(figsize=(10, 6))
|
|
200
|
+
colors = ['#d62728' if x < 0 else '#2ca02c' for x in feature_importance['Coefficient']]
|
|
201
|
+
plt.barh(feature_importance['Feature'], feature_importance['Coefficient'], color=colors)
|
|
202
|
+
plt.xlabel('Coefficient Value', fontsize=12)
|
|
203
|
+
plt.title('Top 15 Feature Importance (Logistic Regression Coefficients)',
|
|
204
|
+
fontsize=14, fontweight='bold')
|
|
205
|
+
plt.grid(axis='x', alpha=0.3)
|
|
206
|
+
plt.tight_layout()
|
|
207
|
+
feat_path = 'feature_importance.png'
|
|
208
|
+
plt.savefig(feat_path, dpi=150)
|
|
209
|
+
plot_paths.append(feat_path)
|
|
210
|
+
plt.close()
|
|
211
|
+
print(f" ✓ Saved: {feat_path}")
|
|
212
|
+
|
|
213
|
+
# 8. Save Artifacts
|
|
214
|
+
print("\n💾 Saving artifacts...")
|
|
215
|
+
artifacts = []
|
|
216
|
+
|
|
217
|
+
# Save predictions
|
|
218
|
+
predictions_df = pd.DataFrame({
|
|
219
|
+
'True_Label': y_test.values,
|
|
220
|
+
'Predicted_Label': y_pred,
|
|
221
|
+
'Probability_Malignant': y_pred_proba,
|
|
222
|
+
'Correct': (y_test.values == y_pred).astype(int)
|
|
223
|
+
})
|
|
224
|
+
pred_path = 'predictions.csv'
|
|
225
|
+
predictions_df.to_csv(pred_path, index=False)
|
|
226
|
+
artifacts.append(pred_path)
|
|
227
|
+
print(f" ✓ Saved: {pred_path}")
|
|
228
|
+
|
|
229
|
+
# Save classification report
|
|
230
|
+
report = classification_report(y_test, y_pred,
|
|
231
|
+
target_names=data.target_names,
|
|
232
|
+
output_dict=True)
|
|
233
|
+
report_df = pd.DataFrame(report).transpose()
|
|
234
|
+
report_path = 'classification_report.csv'
|
|
235
|
+
report_df.to_csv(report_path)
|
|
236
|
+
artifacts.append(report_path)
|
|
237
|
+
print(f" ✓ Saved: {report_path}")
|
|
238
|
+
|
|
239
|
+
# Combine plot paths and artifacts
|
|
240
|
+
all_artifacts = plot_paths + artifacts
|
|
241
|
+
|
|
242
|
+
return metrics, plot_paths, all_artifacts
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def main():
|
|
246
|
+
"""
|
|
247
|
+
Main function to demonstrate NotionHelper integration.
|
|
248
|
+
"""
|
|
249
|
+
|
|
250
|
+
# ============================================================
|
|
251
|
+
# STEP 1: Define Hyperparameters Configuration
|
|
252
|
+
# ============================================================
|
|
253
|
+
config = {
|
|
254
|
+
"Experiment_Name": "Logistic Regression Demo",
|
|
255
|
+
"Model": "Logistic Regression",
|
|
256
|
+
"Dataset": "Breast Cancer (sklearn)",
|
|
257
|
+
"Test_Size": 0.2,
|
|
258
|
+
"Random_State": 42,
|
|
259
|
+
"C_Regularization": 1.0,
|
|
260
|
+
"Max_Iterations": 2,
|
|
261
|
+
"Solver": "lbfgs",
|
|
262
|
+
"Penalty": "l2",
|
|
263
|
+
"Feature_Scaling": True
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
# ============================================================
|
|
267
|
+
# STEP 2: Train Model and Calculate Metrics
|
|
268
|
+
# ============================================================
|
|
269
|
+
metrics, plot_paths, artifacts = train_logistic_regression(
|
|
270
|
+
test_size=config["Test_Size"],
|
|
271
|
+
random_state=config["Random_State"],
|
|
272
|
+
C=config["C_Regularization"],
|
|
273
|
+
max_iter=config["Max_Iterations"],
|
|
274
|
+
solver=config["Solver"],
|
|
275
|
+
penalty=config["Penalty"],
|
|
276
|
+
scale_features=config["Feature_Scaling"]
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# ============================================================
|
|
280
|
+
# STEP 3: Initialize NotionHelper
|
|
281
|
+
# ============================================================
|
|
282
|
+
print("\n" + "="*60)
|
|
283
|
+
print("📝 NOTION INTEGRATION")
|
|
284
|
+
print("="*60 + "\n")
|
|
285
|
+
|
|
286
|
+
# IMPORTANT: Replace with your Notion API token
|
|
287
|
+
NOTION_TOKEN = os.getenv("NOTION_TOKEN", "your_notion_token_here")
|
|
288
|
+
|
|
289
|
+
if NOTION_TOKEN == "your_notion_token_here":
|
|
290
|
+
print("⚠️ WARNING: Please set your NOTION_TOKEN environment variable")
|
|
291
|
+
print(" Example: export NOTION_TOKEN='secret_...'")
|
|
292
|
+
print("\n✅ Demo completed successfully (without Notion logging)")
|
|
293
|
+
print(f"\n📁 Generated files:")
|
|
294
|
+
for artifact in artifacts:
|
|
295
|
+
print(f" • {artifact}")
|
|
296
|
+
return
|
|
297
|
+
|
|
298
|
+
try:
|
|
299
|
+
nh = NotionHelper(NOTION_TOKEN)
|
|
300
|
+
print("✓ NotionHelper initialized successfully")
|
|
301
|
+
|
|
302
|
+
# ============================================================
|
|
303
|
+
# STEP 4A: Create New Database (First time only)
|
|
304
|
+
# ============================================================
|
|
305
|
+
# Set CREATE_NEW_DB to True on first run, then set to False
|
|
306
|
+
CREATE_NEW_DB = False # Force creation for this run
|
|
307
|
+
PARENT_PAGE_ID = "your page id here"
|
|
308
|
+
|
|
309
|
+
if CREATE_NEW_DB:
|
|
310
|
+
print("\n🗄️ Creating new Notion database...")
|
|
311
|
+
data_source_id = nh.create_ml_database(
|
|
312
|
+
parent_page_id=PARENT_PAGE_ID,
|
|
313
|
+
db_title="ML Experiments - Logistic Regression Demo",
|
|
314
|
+
config=config,
|
|
315
|
+
metrics=metrics,
|
|
316
|
+
file_property_name="Artifacts"
|
|
317
|
+
)
|
|
318
|
+
print(f"\n✅ Database created successfully!")
|
|
319
|
+
print(f"📝 Data Source ID: {data_source_id}")
|
|
320
|
+
print("\n" + "="*60)
|
|
321
|
+
print("⚠️ CRITICAL: Complete these steps NOW!")
|
|
322
|
+
print("="*60)
|
|
323
|
+
print("\n1️⃣ Go to Notion and find the new database:")
|
|
324
|
+
print(" 'ML Experiments - Logistic Regression Demo'")
|
|
325
|
+
print("\n2️⃣ Click '...' (top right) → Add connections")
|
|
326
|
+
print(" → Select your integration")
|
|
327
|
+
print("\n3️⃣ Save this Data Source ID:")
|
|
328
|
+
print(f" DATA_SOURCE_ID = \"{data_source_id}\"")
|
|
329
|
+
print("\n4️⃣ Set CREATE_NEW_DB = False in this script")
|
|
330
|
+
print("\n5️⃣ Run the script again to log experiments")
|
|
331
|
+
print("="*60 + "\n")
|
|
332
|
+
|
|
333
|
+
print("⏸️ Skipping experiment logging for this run.")
|
|
334
|
+
print(" Complete steps above, then run again.")
|
|
335
|
+
return # Exit after database creation
|
|
336
|
+
|
|
337
|
+
# This else block will only be reached if CREATE_NEW_DB was initially False
|
|
338
|
+
# and the user has already provided a DATA_SOURCE_ID.
|
|
339
|
+
else:
|
|
340
|
+
# Replace with your actual data source ID after creating the database
|
|
341
|
+
DATA_SOURCE_ID = "your_data_source_id_here" # This should be updated by the user
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
if DATA_SOURCE_ID == "your_data_source_id_here":
|
|
345
|
+
print("\n💡 To log experiments:")
|
|
346
|
+
print(" 1. Ensure CREATE_NEW_DB is False and DATA_SOURCE_ID is set.")
|
|
347
|
+
print(" 2. Make sure the database is shared with your integration.")
|
|
348
|
+
else:
|
|
349
|
+
print("\n� Logging experiment to Notion...")
|
|
350
|
+
page_id = nh.log_ml_experiment(
|
|
351
|
+
data_source_id=DATA_SOURCE_ID,
|
|
352
|
+
config=config,
|
|
353
|
+
metrics=metrics,
|
|
354
|
+
plots=plot_paths,
|
|
355
|
+
target_metric="F1_Score",
|
|
356
|
+
higher_is_better=True,
|
|
357
|
+
file_paths=artifacts,
|
|
358
|
+
file_property_name="Artifacts"
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
if page_id:
|
|
362
|
+
print(f"✓ Experiment logged successfully!")
|
|
363
|
+
print(f" Page ID: {page_id}")
|
|
364
|
+
else:
|
|
365
|
+
print("❌ Failed to log experiment")
|
|
366
|
+
|
|
367
|
+
except Exception as e:
|
|
368
|
+
print(f"❌ Notion API Error: {e}")
|
|
369
|
+
print(" Continuing without Notion logging...")
|
|
370
|
+
|
|
371
|
+
# ============================================================
|
|
372
|
+
# FINAL SUMMARY
|
|
373
|
+
# ============================================================
|
|
374
|
+
print("\n" + "="*60)
|
|
375
|
+
print("✅ DEMO COMPLETED SUCCESSFULLY")
|
|
376
|
+
print("="*60)
|
|
377
|
+
print(f"\n📁 Generated files:")
|
|
378
|
+
for artifact in artifacts:
|
|
379
|
+
print(f" • {artifact}")
|
|
380
|
+
|
|
381
|
+
print("\n📊 Key Metrics:")
|
|
382
|
+
print(f" • Accuracy: {metrics['Accuracy']:.2f}%")
|
|
383
|
+
print(f" • F1 Score: {metrics['F1_Score']:.2f}%")
|
|
384
|
+
print(f" • ROC AUC: {metrics['ROC_AUC']:.2f}%")
|
|
385
|
+
|
|
386
|
+
print("\n🎉 Thank you for trying NotionHelper!")
|
|
387
|
+
print("="*60 + "\n")
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
if __name__ == "__main__":
|
|
391
|
+
main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "notionhelper"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.2"
|
|
4
4
|
description = "NotionHelper is a Python library that simplifies interactions with the Notion API, enabling easy management of databases, pages, and files within Notion workspaces."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|