notionhelper 0.3.2__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {notionhelper-0.3.2 → notionhelper-0.4.1}/ML_DEMO_README.md +3 -1
  2. {notionhelper-0.3.2 → notionhelper-0.4.1}/PKG-INFO +44 -2
  3. {notionhelper-0.3.2 → notionhelper-0.4.1}/README.md +43 -1
  4. notionhelper-0.4.1/SEPARATION_SUMMARY.md +70 -0
  5. {notionhelper-0.3.2 → notionhelper-0.4.1}/examples/ml_demo.py +3 -3
  6. {notionhelper-0.3.2 → notionhelper-0.4.1}/pyproject.toml +1 -1
  7. notionhelper-0.4.1/src/notionhelper/__init__.py +4 -0
  8. {notionhelper-0.3.2 → notionhelper-0.4.1}/src/notionhelper/helper.py +118 -179
  9. notionhelper-0.4.1/src/notionhelper/ml_logger.py +206 -0
  10. notionhelper-0.3.2/src/notionhelper/__init__.py +0 -3
  11. {notionhelper-0.3.2 → notionhelper-0.4.1}/.coverage +0 -0
  12. {notionhelper-0.3.2 → notionhelper-0.4.1}/.github/workflows/claude-code-review.yml +0 -0
  13. {notionhelper-0.3.2 → notionhelper-0.4.1}/.github/workflows/claude.yml +0 -0
  14. {notionhelper-0.3.2 → notionhelper-0.4.1}/.gitignore +0 -0
  15. {notionhelper-0.3.2 → notionhelper-0.4.1}/GETTING_STARTED.md +0 -0
  16. {notionhelper-0.3.2 → notionhelper-0.4.1}/images/helper_logo.png +0 -0
  17. {notionhelper-0.3.2 → notionhelper-0.4.1}/images/json_builder.png.png +0 -0
  18. {notionhelper-0.3.2 → notionhelper-0.4.1}/images/logo.png +0 -0
  19. {notionhelper-0.3.2 → notionhelper-0.4.1}/images/notionh3.png +0 -0
  20. {notionhelper-0.3.2 → notionhelper-0.4.1}/images/pillio.png +0 -0
  21. {notionhelper-0.3.2 → notionhelper-0.4.1}/images/pillio2.png +0 -0
  22. {notionhelper-0.3.2 → notionhelper-0.4.1}/notionapi_md_info.md +0 -0
  23. {notionhelper-0.3.2 → notionhelper-0.4.1}/pytest.ini +0 -0
  24. {notionhelper-0.3.2 → notionhelper-0.4.1}/tests/README.md +0 -0
  25. {notionhelper-0.3.2 → notionhelper-0.4.1}/tests/__init__.py +0 -0
  26. {notionhelper-0.3.2 → notionhelper-0.4.1}/tests/conftest.py +0 -0
  27. {notionhelper-0.3.2 → notionhelper-0.4.1}/tests/test_helper.py +0 -0
  28. {notionhelper-0.3.2 → notionhelper-0.4.1}/uv.lock +0 -0
@@ -2,7 +2,9 @@
2
2
 
3
3
  ## Overview
4
4
 
5
- `ml_demo.py` is a comprehensive demonstration of how to use **NotionHelper** to track machine learning experiments. It showcases a complete workflow from model training to Notion integration.
5
+ `ml_demo.py` is a comprehensive demonstration of how to use **MLNotionHelper** (which extends NotionHelper) to track machine learning experiments. It showcases a complete workflow from model training to Notion integration.
6
+
7
+ **Note:** The ML experiment tracking features are available in the `MLNotionHelper` class, which inherits from `NotionHelper` and adds specialized methods for logging ML experiments.
6
8
 
7
9
  ## Features
8
10
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: notionhelper
3
- Version: 0.3.2
3
+ Version: 0.4.1
4
4
  Summary: NotionHelper is a Python library that simplifies interactions with the Notion API, enabling easy management of databases, pages, and files within Notion workspaces.
5
5
  Author-email: Jan du Plessis <drjanduplessis@icloud.com>
6
6
  Requires-Python: >=3.10
@@ -74,7 +74,7 @@ Here is an example of how to use the library:
74
74
 
75
75
  ```python
76
76
  import os
77
- from notionhelper import NotionHelper
77
+ from notionhelper import NotionHelper, MLNotionHelper
78
78
  ```
79
79
 
80
80
  ### Initialize the NotionHelper class
@@ -82,7 +82,11 @@ from notionhelper import NotionHelper
82
82
  ```python
83
83
  notion_token = os.getenv("NOTION_TOKEN")
84
84
 
85
+ # For core Notion operations
85
86
  helper = NotionHelper(notion_token)
87
+
88
+ # For ML experiment tracking (includes all NotionHelper methods)
89
+ ml_helper = MLNotionHelper(notion_token)
86
90
  ```
87
91
 
88
92
  ### Retrieve a Database (Container)
@@ -164,6 +168,44 @@ helper.append_page_body(page_id, blocks)
164
168
  print(f"Successfully appended content to page ID: {page_id}")
165
169
  ```
166
170
 
171
+ ### Retrieve a Page and Convert to Markdown
172
+
173
+ NotionHelper can retrieve page content and optionally convert it to markdown format for easy use in documents, blogs, or other applications.
174
+
175
+ #### Get Page as JSON (Default)
176
+
177
+ ```python
178
+ page_id = "your_page_id"
179
+ result = helper.get_page(page_id)
180
+ properties = result["properties"] # Page properties
181
+ content = result["content"] # List of block objects (JSON)
182
+ ```
183
+
184
+ #### Get Page as Markdown
185
+
186
+ ```python
187
+ page_id = "your_page_id"
188
+ result = helper.get_page(page_id, return_markdown=True)
189
+ properties = result["properties"] # Page properties
190
+ markdown_content = result["content"] # String in markdown format
191
+ print(markdown_content)
192
+ ```
193
+
194
+ The markdown conversion supports:
195
+ - **Headings** (H1, H2, H3)
196
+ - **Text formatting** (bold, italic, strikethrough, code, links)
197
+ - **Lists** (bulleted and numbered)
198
+ - **Code blocks** with language syntax highlighting
199
+ - **Images**
200
+ - **Dividers** and block quotes
201
+
202
+ This is useful for:
203
+ - Exporting Notion pages to markdown files
204
+ - Integrating with static site generators
205
+ - Creating blog posts from Notion content
206
+ - Storing content in version control
207
+ - Converting documentation to other formats
208
+
167
209
  ### Get all pages from a Data Source as a Pandas DataFrame
168
210
 
169
211
  ```python
@@ -47,7 +47,7 @@ Here is an example of how to use the library:
47
47
 
48
48
  ```python
49
49
  import os
50
- from notionhelper import NotionHelper
50
+ from notionhelper import NotionHelper, MLNotionHelper
51
51
  ```
52
52
 
53
53
  ### Initialize the NotionHelper class
@@ -55,7 +55,11 @@ from notionhelper import NotionHelper
55
55
  ```python
56
56
  notion_token = os.getenv("NOTION_TOKEN")
57
57
 
58
+ # For core Notion operations
58
59
  helper = NotionHelper(notion_token)
60
+
61
+ # For ML experiment tracking (includes all NotionHelper methods)
62
+ ml_helper = MLNotionHelper(notion_token)
59
63
  ```
60
64
 
61
65
  ### Retrieve a Database (Container)
@@ -137,6 +141,44 @@ helper.append_page_body(page_id, blocks)
137
141
  print(f"Successfully appended content to page ID: {page_id}")
138
142
  ```
139
143
 
144
+ ### Retrieve a Page and Convert to Markdown
145
+
146
+ NotionHelper can retrieve page content and optionally convert it to markdown format for easy use in documents, blogs, or other applications.
147
+
148
+ #### Get Page as JSON (Default)
149
+
150
+ ```python
151
+ page_id = "your_page_id"
152
+ result = helper.get_page(page_id)
153
+ properties = result["properties"] # Page properties
154
+ content = result["content"] # List of block objects (JSON)
155
+ ```
156
+
157
+ #### Get Page as Markdown
158
+
159
+ ```python
160
+ page_id = "your_page_id"
161
+ result = helper.get_page(page_id, return_markdown=True)
162
+ properties = result["properties"] # Page properties
163
+ markdown_content = result["content"] # String in markdown format
164
+ print(markdown_content)
165
+ ```
166
+
167
+ The markdown conversion supports:
168
+ - **Headings** (H1, H2, H3)
169
+ - **Text formatting** (bold, italic, strikethrough, code, links)
170
+ - **Lists** (bulleted and numbered)
171
+ - **Code blocks** with language syntax highlighting
172
+ - **Images**
173
+ - **Dividers** and block quotes
174
+
175
+ This is useful for:
176
+ - Exporting Notion pages to markdown files
177
+ - Integrating with static site generators
178
+ - Creating blog posts from Notion content
179
+ - Storing content in version control
180
+ - Converting documentation to other formats
181
+
140
182
  ### Get all pages from a Data Source as a Pandas DataFrame
141
183
 
142
184
  ```python
@@ -0,0 +1,70 @@
1
+ # ML Functions Separation - Implementation Summary
2
+
3
+ ## What Was Done
4
+
5
+ Successfully separated Machine Learning functions from the core NotionHelper class using **inheritance-based approach**.
6
+
7
+ ### File Changes
8
+
9
+ #### 1. **Created: `src/notionhelper/ml_logger.py`** (NEW)
10
+ - New `MLNotionHelper` class that **inherits from `NotionHelper`**
11
+ - Moved ML-specific methods:
12
+ - `log_ml_experiment()` - Logs experiments with metrics, plots, and artifacts
13
+ - `create_ml_database()` - Creates Notion databases optimized for ML tracking
14
+ - `dict_to_notion_schema()` - Converts dictionaries to Notion schema
15
+ - `dict_to_notion_props()` - Converts dictionaries to Notion properties
16
+
17
+ #### 2. **Modified: `src/notionhelper/helper.py`**
18
+ - Removed the 4 ML-specific methods listed above
19
+ - **Kept all core Notion API methods**:
20
+ - Database/data source operations
21
+ - Page creation and retrieval
22
+ - File upload and embedding
23
+ - Block management
24
+
25
+ #### 3. **Updated: `src/notionhelper/__init__.py`**
26
+ ```python
27
+ from .helper import NotionHelper
28
+ from .ml_logger import MLNotionHelper
29
+
30
+ __all__ = ["NotionHelper", "MLNotionHelper"]
31
+ ```
32
+
33
+ #### 4. **Updated: `examples/ml_demo.py`**
34
+ - Changed import: `from notionhelper import MLNotionHelper`
35
+ - Changed initialization: `nh = MLNotionHelper(NOTION_TOKEN)`
36
+
37
+ ## Usage
38
+
39
+ ### Simple, Single Instantiation:
40
+ ```python
41
+ from notionhelper import MLNotionHelper
42
+
43
+ # One line - that's it!
44
+ ml_tracker = MLNotionHelper(notion_token)
45
+
46
+ # Use ML methods
47
+ ml_tracker.log_ml_experiment(...)
48
+ ml_tracker.create_ml_database(...)
49
+
50
+ # Also available: all NotionHelper methods
51
+ ml_tracker.get_data_source(...)
52
+ ml_tracker.upload_file(...)
53
+ ```
54
+
55
+ ## Architecture Benefits
56
+
57
+ ✅ **Clean Separation** - ML logic isolated in dedicated module
58
+ ✅ **Single Instantiation** - No extra code needed
59
+ ✅ **Minimal Changes** - Just inherit and move methods
60
+ ✅ **Backward Compatible** - `NotionHelper` still available separately
61
+ ✅ **Extensible** - Easy to add other trackers (e.g., `ImageNotionHelper`)
62
+ ✅ **Elegant** - Inheritance makes intent clear
63
+
64
+ ## File Structure
65
+ ```
66
+ src/notionhelper/
67
+ ├── helper.py # Core Notion API methods
68
+ ├── ml_logger.py # ML experiment tracking (NEW)
69
+ └── __init__.py # Exports both classes
70
+ ```
@@ -31,7 +31,7 @@ from sklearn.metrics import (
31
31
  )
32
32
  from sklearn.preprocessing import StandardScaler
33
33
 
34
- from carecast.notionhelper import NotionHelper
34
+ from notionhelper import MLNotionHelper
35
35
 
36
36
 
37
37
  def train_logistic_regression(
@@ -296,8 +296,8 @@ def main():
296
296
  return
297
297
 
298
298
  try:
299
- nh = NotionHelper(NOTION_TOKEN)
300
- print("✓ NotionHelper initialized successfully")
299
+ nh = MLNotionHelper(NOTION_TOKEN)
300
+ print("✓ MLNotionHelper initialized successfully")
301
301
 
302
302
  # ============================================================
303
303
  # STEP 4A: Create New Database (First time only)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "notionhelper"
3
- version = "0.3.2"
3
+ version = "0.4.1"
4
4
  description = "NotionHelper is a Python library that simplifies interactions with the Notion API, enabling easy management of databases, pages, and files within Notion workspaces."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -0,0 +1,4 @@
1
+ from .helper import NotionHelper
2
+ from .ml_logger import MLNotionHelper
3
+
4
+ __all__ = ["NotionHelper", "MLNotionHelper"]
@@ -174,8 +174,118 @@ class NotionHelper:
174
174
  response = self._make_request("POST", url, payload)
175
175
  return response.get("results", [])
176
176
 
177
- def get_page(self, page_id: str) -> Dict[str, Any]:
178
- """Retrieves the JSON of the page properties and an array of blocks on a Notion page given its page_id."""
177
+ def _blocks_to_markdown(self, blocks: List[Dict[str, Any]]) -> str:
178
+ """Converts Notion blocks to markdown format.
179
+
180
+ Parameters:
181
+ blocks (list): List of block objects from Notion API
182
+
183
+ Returns:
184
+ str: Markdown formatted string
185
+ """
186
+ markdown_lines = []
187
+
188
+ for block in blocks:
189
+ block_type = block.get("type", "")
190
+ block_data = block.get(block_type, {})
191
+
192
+ if block_type == "paragraph":
193
+ text = self._extract_rich_text(block_data.get("rich_text", []))
194
+ if text:
195
+ markdown_lines.append(text)
196
+ markdown_lines.append("")
197
+
198
+ elif block_type == "heading_1":
199
+ text = self._extract_rich_text(block_data.get("rich_text", []))
200
+ markdown_lines.append(f"# {text}")
201
+ markdown_lines.append("")
202
+
203
+ elif block_type == "heading_2":
204
+ text = self._extract_rich_text(block_data.get("rich_text", []))
205
+ markdown_lines.append(f"## {text}")
206
+ markdown_lines.append("")
207
+
208
+ elif block_type == "heading_3":
209
+ text = self._extract_rich_text(block_data.get("rich_text", []))
210
+ markdown_lines.append(f"### {text}")
211
+ markdown_lines.append("")
212
+
213
+ elif block_type == "bulleted_list_item":
214
+ text = self._extract_rich_text(block_data.get("rich_text", []))
215
+ markdown_lines.append(f"- {text}")
216
+
217
+ elif block_type == "numbered_list_item":
218
+ text = self._extract_rich_text(block_data.get("rich_text", []))
219
+ markdown_lines.append(f"1. {text}")
220
+
221
+ elif block_type == "code":
222
+ code_text = self._extract_rich_text(block_data.get("rich_text", []))
223
+ language = block_data.get("language", "")
224
+ markdown_lines.append(f"```{language}")
225
+ markdown_lines.append(code_text)
226
+ markdown_lines.append("```")
227
+ markdown_lines.append("")
228
+
229
+ elif block_type == "image":
230
+ image_data = block_data.get("external", {}) or block_data.get("file", {})
231
+ image_url = image_data.get("url", "")
232
+ if image_url:
233
+ markdown_lines.append(f"![Image]({image_url})")
234
+ markdown_lines.append("")
235
+
236
+ elif block_type == "divider":
237
+ markdown_lines.append("---")
238
+ markdown_lines.append("")
239
+
240
+ elif block_type == "quote":
241
+ text = self._extract_rich_text(block_data.get("rich_text", []))
242
+ markdown_lines.append(f"> {text}")
243
+ markdown_lines.append("")
244
+
245
+ return "\n".join(markdown_lines).strip()
246
+
247
+ def _extract_rich_text(self, rich_text_array: List[Dict[str, Any]]) -> str:
248
+ """Extracts and formats rich text from Notion rich_text array.
249
+
250
+ Parameters:
251
+ rich_text_array (list): Array of rich text objects
252
+
253
+ Returns:
254
+ str: Formatted text with markdown syntax
255
+ """
256
+ result = []
257
+
258
+ for text_obj in rich_text_array:
259
+ content = text_obj.get("text", {}).get("content", "")
260
+ annotations = text_obj.get("annotations", {})
261
+ href = text_obj.get("href", None)
262
+
263
+ # Apply markdown formatting based on annotations
264
+ if annotations.get("bold"):
265
+ content = f"**{content}**"
266
+ if annotations.get("italic"):
267
+ content = f"*{content}*"
268
+ if annotations.get("strikethrough"):
269
+ content = f"~~{content}~~"
270
+ if annotations.get("code"):
271
+ content = f"`{content}`"
272
+ if href:
273
+ content = f"[{content}]({href})"
274
+
275
+ result.append(content)
276
+
277
+ return "".join(result)
278
+
279
+ def get_page(self, page_id: str, return_markdown: bool = False) -> Dict[str, Any]:
280
+ """Retrieves the JSON of the page properties and an array of blocks on a Notion page given its page_id.
281
+
282
+ Parameters:
283
+ page_id (str): The ID of the Notion page
284
+ return_markdown (bool): If True, converts blocks to markdown. If False, returns raw JSON. Defaults to False.
285
+
286
+ Returns:
287
+ dict: Dictionary with 'properties' and 'content' (as JSON or markdown string)
288
+ """
179
289
 
180
290
  # Retrieve the page properties
181
291
  page_url = f"https://api.notion.com/v1/pages/{page_id}"
@@ -187,10 +297,13 @@ class NotionHelper:
187
297
 
188
298
  # Extract all properties as a JSON object
189
299
  properties = page.get("properties", {})
190
- content = [block for block in blocks["results"]]
300
+ content_blocks = [block for block in blocks["results"]]
191
301
 
192
- # Print the full JSON of the properties
193
- print(properties)
302
+ # Convert to markdown if requested
303
+ if return_markdown:
304
+ content = self._blocks_to_markdown(content_blocks)
305
+ else:
306
+ content = content_blocks
194
307
 
195
308
  # Return the properties JSON and blocks content
196
309
  return {"properties": properties, "content": content}
@@ -654,177 +767,3 @@ class NotionHelper:
654
767
  }
655
768
  response = requests.patch(update_url, headers=headers, json=data)
656
769
  return response.json()
657
-
658
- def dict_to_notion_schema(self, data: Dict[str, Any], title_key: str) -> Dict[str, Any]:
659
- """Converts a dictionary into a Notion property schema for database creation.
660
-
661
- Parameters:
662
- data (dict): Dictionary containing sample values to infer types from.
663
- title_key (str): The key that should be used as the title property.
664
-
665
- Returns:
666
- dict: A dictionary defining the Notion property schema.
667
- """
668
- properties = {}
669
-
670
- for key, value in data.items():
671
- # Handle NumPy types
672
- if hasattr(value, "item"):
673
- value = value.item()
674
-
675
- # Debug output to help diagnose type issues
676
- print(f"DEBUG: key='{key}', value={value}, type={type(value).__name__}, isinstance(bool)={isinstance(value, bool)}, isinstance(int)={isinstance(value, int)}")
677
-
678
- if key == title_key:
679
- properties[key] = {"title": {}}
680
- # IMPORTANT: Check for bool BEFORE (int, float) because bool is a subclass of int in Python
681
- elif isinstance(value, bool):
682
- properties[key] = {"checkbox": {}}
683
- print(f" → Assigned as CHECKBOX")
684
- elif isinstance(value, (int, float)):
685
- properties[key] = {"number": {"format": "number"}}
686
- print(f" → Assigned as NUMBER")
687
- else:
688
- properties[key] = {"rich_text": {}}
689
- print(f" → Assigned as RICH_TEXT")
690
-
691
- return properties
692
-
693
- def dict_to_notion_props(self, data: Dict[str, Any], title_key: str) -> Dict[str, Any]:
694
- """Converts a dictionary into Notion property values for page creation.
695
-
696
- Parameters:
697
- data (dict): Dictionary containing the values to convert.
698
- title_key (str): The key that should be used as the title property.
699
-
700
- Returns:
701
- dict: A dictionary defining the Notion property values.
702
- """
703
- notion_props = {}
704
- for key, value in data.items():
705
- # Handle NumPy types
706
- if hasattr(value, "item"):
707
- value = value.item()
708
-
709
- if key == title_key:
710
- ts = datetime.now().strftime("%Y-%m-%d %H:%M")
711
- notion_props[key] = {"title": [{"text": {"content": f"{value} ({ts})"}}]}
712
-
713
- # FIX: Handle Booleans
714
- elif isinstance(value, bool):
715
- # Option A: Map to a Checkbox column in Notion
716
- # notion_props[key] = {"checkbox": value}
717
-
718
- # Option B: Map to a Rich Text column as a string (since you added a rich text field)
719
- notion_props[key] = {"rich_text": [{"text": {"content": str(value)}}]}
720
-
721
- elif isinstance(value, (int, float)):
722
- if pd.isna(value) or np.isinf(value): continue
723
- notion_props[key] = {"number": float(value)}
724
- else:
725
- notion_props[key] = {"rich_text": [{"text": {"content": str(value)}}]}
726
- return notion_props
727
-
728
- def log_ml_experiment(
729
- self,
730
- data_source_id: str,
731
- config: Dict,
732
- metrics: Dict,
733
- plots: List[str] = None,
734
- target_metric: str = "sMAPE", # Re-added these
735
- higher_is_better: bool = False, # to fix the error
736
- file_paths: Optional[List[str]] = None, # Changed to list
737
- file_property_name: str = "Output Files"
738
- ):
739
- """Logs ML experiment and compares metrics with multiple file support."""
740
- improvement_tag = "Standard Run"
741
- new_score = metrics.get(target_metric)
742
-
743
- # 1. Leaderboard Logic (Champions)
744
- if new_score is not None:
745
- try:
746
- df = self.get_data_source_pages_as_dataframe(data_source_id, limit=100)
747
- if not df.empty and target_metric in df.columns:
748
- valid_scores = pd.to_numeric(df[target_metric], errors='coerce').dropna()
749
- if not valid_scores.empty:
750
- current_best = valid_scores.max() if higher_is_better else valid_scores.min()
751
- is_improvement = (new_score > current_best) if higher_is_better else (new_score < current_best)
752
- if is_improvement:
753
- improvement_tag = f"🏆 NEW BEST {target_metric} (Prev: {current_best:.2f})"
754
- else:
755
- diff = abs(new_score - current_best)
756
- improvement_tag = f"No Improvement (+{diff:.2f} {target_metric})"
757
- except Exception as e:
758
- print(f"Leaderboard check skipped: {e}")
759
-
760
- # 2. Prepare Notion Properties
761
- data_for_notion = metrics.copy()
762
- data_for_notion["Run Status"] = improvement_tag
763
- combined_payload = {**config, **data_for_notion}
764
- title_key = list(config.keys())[0]
765
- properties = self.dict_to_notion_props(combined_payload, title_key)
766
-
767
- try:
768
- # 3. Create the row
769
- new_page = self.new_page_to_data_source(data_source_id, properties)
770
- page_id = new_page["id"]
771
-
772
- # 4. Handle Plots (Body)
773
- if plots:
774
- for plot_path in plots:
775
- if os.path.exists(plot_path):
776
- self.one_step_image_embed(page_id, plot_path)
777
-
778
- # 5. Handle Multiple File Uploads (Property)
779
- if file_paths:
780
- file_assets = []
781
- for path in file_paths:
782
- if os.path.exists(path):
783
- print(f"Uploading {path}...")
784
- upload_resp = self.upload_file(path)
785
- file_assets.append({
786
- "type": "file_upload",
787
- "file_upload": {"id": upload_resp["id"]},
788
- "name": os.path.basename(path),
789
- })
790
-
791
- if file_assets:
792
- # Attach all files in one request
793
- update_url = f"https://api.notion.com/v1/pages/{page_id}"
794
- file_payload = {"properties": {file_property_name: {"files": file_assets}}}
795
- self._make_request("PATCH", update_url, file_payload)
796
- print(f"✅ {len(file_assets)} files attached to {file_property_name}")
797
-
798
- return page_id
799
- except Exception as e:
800
- print(f"Log error: {e}")
801
- return None
802
-
803
- def create_ml_database(self, parent_page_id: str, db_title: str, config: Dict, metrics: Dict, file_property_name: str = "Output Files") -> str:
804
- """
805
- Analyzes dicts to create a new Notion Database with the correct schema.
806
- Uses dict_to_notion_schema() for universal type conversion.
807
- """
808
- combined = {**config, **metrics}
809
- title_key = list(config.keys())[0]
810
-
811
- # Use the universal dict_to_notion_schema() method
812
- properties = self.dict_to_notion_schema(combined, title_key)
813
-
814
- # Add 'Run Status' if not already present
815
- if "Run Status" not in properties:
816
- properties["Run Status"] = {"rich_text": {}}
817
-
818
- # Add the Multi-file property
819
- properties[file_property_name] = {"files": {}}
820
-
821
- print(f"Creating database '{db_title}' with {len(properties)} columns...")
822
-
823
- response = self.create_database(
824
- parent_page_id=parent_page_id,
825
- database_title=db_title,
826
- initial_data_source_properties=properties
827
- )
828
-
829
- data_source_id = response.get("initial_data_source", {}).get("id")
830
- return data_source_id if data_source_id else response.get("id")
@@ -0,0 +1,206 @@
1
+ from typing import Optional, Dict, List, Any
2
+ import pandas as pd
3
+ import numpy as np
4
+ import os
5
+ from datetime import datetime
6
+
7
+ from .helper import NotionHelper
8
+
9
+
10
+ class MLNotionHelper(NotionHelper):
11
+ """
12
+ ML experiment tracking helper that extends NotionHelper.
13
+
14
+ Provides specialized methods for logging and tracking machine learning experiments,
15
+ automatically comparing metrics against historical runs and logging results to Notion.
16
+
17
+ Methods
18
+ -------
19
+ log_ml_experiment(data_source_id, config, metrics, plots, target_metric,
20
+ higher_is_better, file_paths, file_property_name):
21
+ Logs an ML experiment run with metrics, plots, and artifacts.
22
+
23
+ create_ml_database(parent_page_id, db_title, config, metrics, file_property_name):
24
+ Creates a new Notion database optimized for ML experiment tracking.
25
+
26
+ dict_to_notion_schema(data, title_key):
27
+ Converts a dictionary into a Notion property schema.
28
+
29
+ dict_to_notion_props(data, title_key):
30
+ Converts a dictionary into Notion property values.
31
+ """
32
+
33
+ def dict_to_notion_schema(self, data: Dict[str, Any], title_key: str) -> Dict[str, Any]:
34
+ """Converts a dictionary into a Notion property schema for database creation.
35
+
36
+ Parameters:
37
+ data (dict): Dictionary containing sample values to infer types from.
38
+ title_key (str): The key that should be used as the title property.
39
+
40
+ Returns:
41
+ dict: A dictionary defining the Notion property schema.
42
+ """
43
+ properties = {}
44
+
45
+ for key, value in data.items():
46
+ # Handle NumPy types
47
+ if hasattr(value, "item"):
48
+ value = value.item()
49
+
50
+ # Debug output to help diagnose type issues
51
+ print(f"DEBUG: key='{key}', value={value}, type={type(value).__name__}, isinstance(bool)={isinstance(value, bool)}, isinstance(int)={isinstance(value, int)}")
52
+
53
+ if key == title_key:
54
+ properties[key] = {"title": {}}
55
+ # IMPORTANT: Check for bool BEFORE (int, float) because bool is a subclass of int in Python
56
+ elif isinstance(value, bool):
57
+ properties[key] = {"checkbox": {}}
58
+ print(f" → Assigned as CHECKBOX")
59
+ elif isinstance(value, (int, float)):
60
+ properties[key] = {"number": {"format": "number"}}
61
+ print(f" → Assigned as NUMBER")
62
+ else:
63
+ properties[key] = {"rich_text": {}}
64
+ print(f" → Assigned as RICH_TEXT")
65
+
66
+ return properties
67
+
68
+ def dict_to_notion_props(self, data: Dict[str, Any], title_key: str) -> Dict[str, Any]:
69
+ """Converts a dictionary into Notion property values for page creation.
70
+
71
+ Parameters:
72
+ data (dict): Dictionary containing the values to convert.
73
+ title_key (str): The key that should be used as the title property.
74
+
75
+ Returns:
76
+ dict: A dictionary defining the Notion property values.
77
+ """
78
+ notion_props = {}
79
+ for key, value in data.items():
80
+ # Handle NumPy types
81
+ if hasattr(value, "item"):
82
+ value = value.item()
83
+
84
+ if key == title_key:
85
+ ts = datetime.now().strftime("%Y-%m-%d %H:%M")
86
+ notion_props[key] = {"title": [{"text": {"content": f"{value} ({ts})"}}]}
87
+
88
+ # FIX: Handle Booleans
89
+ elif isinstance(value, bool):
90
+ # Option A: Map to a Checkbox column in Notion
91
+ # notion_props[key] = {"checkbox": value}
92
+
93
+ # Option B: Map to a Rich Text column as a string (since you added a rich text field)
94
+ notion_props[key] = {"rich_text": [{"text": {"content": str(value)}}]}
95
+
96
+ elif isinstance(value, (int, float)):
97
+ if pd.isna(value) or np.isinf(value):
98
+ continue
99
+ notion_props[key] = {"number": float(value)}
100
+ else:
101
+ notion_props[key] = {"rich_text": [{"text": {"content": str(value)}}]}
102
+ return notion_props
103
+
104
+ def log_ml_experiment(
105
+ self,
106
+ data_source_id: str,
107
+ config: Dict,
108
+ metrics: Dict,
109
+ plots: List[str] = None,
110
+ target_metric: str = "sMAPE",
111
+ higher_is_better: bool = False,
112
+ file_paths: Optional[List[str]] = None,
113
+ file_property_name: str = "Output Files"
114
+ ):
115
+ """Logs ML experiment and compares metrics with multiple file support."""
116
+ improvement_tag = "Standard Run"
117
+ new_score = metrics.get(target_metric)
118
+
119
+ # 1. Leaderboard Logic (Champions)
120
+ if new_score is not None:
121
+ try:
122
+ df = self.get_data_source_pages_as_dataframe(data_source_id, limit=100)
123
+ if not df.empty and target_metric in df.columns:
124
+ valid_scores = pd.to_numeric(df[target_metric], errors='coerce').dropna()
125
+ if not valid_scores.empty:
126
+ current_best = valid_scores.max() if higher_is_better else valid_scores.min()
127
+ is_improvement = (new_score > current_best) if higher_is_better else (new_score < current_best)
128
+ if is_improvement:
129
+ improvement_tag = f"🏆 NEW BEST {target_metric} (Prev: {current_best:.2f})"
130
+ else:
131
+ diff = abs(new_score - current_best)
132
+ improvement_tag = f"No Improvement (+{diff:.2f} {target_metric})"
133
+ except Exception as e:
134
+ print(f"Leaderboard check skipped: {e}")
135
+
136
+ # 2. Prepare Notion Properties
137
+ data_for_notion = metrics.copy()
138
+ data_for_notion["Run Status"] = improvement_tag
139
+ combined_payload = {**config, **data_for_notion}
140
+ title_key = list(config.keys())[0]
141
+ properties = self.dict_to_notion_props(combined_payload, title_key)
142
+
143
+ try:
144
+ # 3. Create the row
145
+ new_page = self.new_page_to_data_source(data_source_id, properties)
146
+ page_id = new_page["id"]
147
+
148
+ # 4. Handle Plots (Body)
149
+ if plots:
150
+ for plot_path in plots:
151
+ if os.path.exists(plot_path):
152
+ self.one_step_image_embed(page_id, plot_path)
153
+
154
+ # 5. Handle Multiple File Uploads (Property)
155
+ if file_paths:
156
+ file_assets = []
157
+ for path in file_paths:
158
+ if os.path.exists(path):
159
+ print(f"Uploading {path}...")
160
+ upload_resp = self.upload_file(path)
161
+ file_assets.append({
162
+ "type": "file_upload",
163
+ "file_upload": {"id": upload_resp["id"]},
164
+ "name": os.path.basename(path),
165
+ })
166
+
167
+ if file_assets:
168
+ # Attach all files in one request
169
+ update_url = f"https://api.notion.com/v1/pages/{page_id}"
170
+ file_payload = {"properties": {file_property_name: {"files": file_assets}}}
171
+ self._make_request("PATCH", update_url, file_payload)
172
+ print(f"✅ {len(file_assets)} files attached to {file_property_name}")
173
+
174
+ return page_id
175
+ except Exception as e:
176
+ print(f"Log error: {e}")
177
+ return None
178
+
179
+ def create_ml_database(self, parent_page_id: str, db_title: str, config: Dict, metrics: Dict, file_property_name: str = "Output Files") -> str:
180
+ """
181
+ Analyzes dicts to create a new Notion Database with the correct schema.
182
+ Uses dict_to_notion_schema() for universal type conversion.
183
+ """
184
+ combined = {**config, **metrics}
185
+ title_key = list(config.keys())[0]
186
+
187
+ # Use the universal dict_to_notion_schema() method
188
+ properties = self.dict_to_notion_schema(combined, title_key)
189
+
190
+ # Add 'Run Status' if not already present
191
+ if "Run Status" not in properties:
192
+ properties["Run Status"] = {"rich_text": {}}
193
+
194
+ # Add the Multi-file property
195
+ properties[file_property_name] = {"files": {}}
196
+
197
+ print(f"Creating database '{db_title}' with {len(properties)} columns...")
198
+
199
+ response = self.create_database(
200
+ parent_page_id=parent_page_id,
201
+ database_title=db_title,
202
+ initial_data_source_properties=properties
203
+ )
204
+
205
+ data_source_id = response.get("initial_data_source", {}).get("id")
206
+ return data_source_id if data_source_id else response.get("id")
@@ -1,3 +0,0 @@
1
- from .helper import NotionHelper
2
-
3
- __all__ = ["NotionHelper"]
File without changes
File without changes
File without changes
File without changes