codebase-cortex 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. codebase_cortex/__init__.py +3 -0
  2. codebase_cortex/agents/__init__.py +0 -0
  3. codebase_cortex/agents/base.py +69 -0
  4. codebase_cortex/agents/code_analyzer.py +122 -0
  5. codebase_cortex/agents/doc_writer.py +356 -0
  6. codebase_cortex/agents/semantic_finder.py +64 -0
  7. codebase_cortex/agents/sprint_reporter.py +152 -0
  8. codebase_cortex/agents/task_creator.py +138 -0
  9. codebase_cortex/auth/__init__.py +0 -0
  10. codebase_cortex/auth/callback_server.py +80 -0
  11. codebase_cortex/auth/oauth.py +173 -0
  12. codebase_cortex/auth/token_store.py +90 -0
  13. codebase_cortex/cli.py +855 -0
  14. codebase_cortex/config.py +150 -0
  15. codebase_cortex/embeddings/__init__.py +0 -0
  16. codebase_cortex/embeddings/clustering.py +140 -0
  17. codebase_cortex/embeddings/indexer.py +208 -0
  18. codebase_cortex/embeddings/store.py +126 -0
  19. codebase_cortex/git/__init__.py +0 -0
  20. codebase_cortex/git/diff_parser.py +185 -0
  21. codebase_cortex/git/github_client.py +46 -0
  22. codebase_cortex/graph.py +111 -0
  23. codebase_cortex/mcp_client.py +94 -0
  24. codebase_cortex/notion/__init__.py +0 -0
  25. codebase_cortex/notion/bootstrap.py +298 -0
  26. codebase_cortex/notion/page_cache.py +107 -0
  27. codebase_cortex/state.py +77 -0
  28. codebase_cortex/utils/__init__.py +0 -0
  29. codebase_cortex/utils/json_parsing.py +59 -0
  30. codebase_cortex/utils/logging.py +62 -0
  31. codebase_cortex/utils/rate_limiter.py +56 -0
  32. codebase_cortex/utils/section_parser.py +139 -0
  33. codebase_cortex-0.1.0.dist-info/METADATA +209 -0
  34. codebase_cortex-0.1.0.dist-info/RECORD +37 -0
  35. codebase_cortex-0.1.0.dist-info/WHEEL +4 -0
  36. codebase_cortex-0.1.0.dist-info/entry_points.txt +3 -0
  37. codebase_cortex-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,298 @@
1
+ """Bootstrap starter Notion pages via MCP on first run."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from mcp import ClientSession
6
+
7
+ from codebase_cortex.config import Settings
8
+ from codebase_cortex.notion.page_cache import PageCache
9
+ from codebase_cortex.utils.logging import get_logger
10
+
11
+ logger = get_logger()
12
+
13
+ PARENT_PAGE_TITLE = "Codebase Cortex"
14
+
15
+
16
+ def normalize_page_id(raw_id: str) -> str:
17
+ """Normalize a Notion page ID to dashed UUID format.
18
+
19
+ Notion URLs use dashless IDs, but our cache stores dashed format.
20
+ This ensures consistent lookups.
21
+ """
22
+ clean = raw_id.replace("-", "").lower()
23
+ if len(clean) == 32:
24
+ return f"{clean[:8]}-{clean[8:12]}-{clean[12:16]}-{clean[16:20]}-{clean[20:]}"
25
+ return raw_id
26
+
27
+ STARTER_PAGES = [
28
+ {
29
+ "title": "Architecture Overview",
30
+ "icon": "🏗️",
31
+ "description": "System design, component relationships, and architectural decisions.",
32
+ },
33
+ {
34
+ "title": "API Reference",
35
+ "icon": "📡",
36
+ "description": "Endpoints, schemas, contracts, and integration points.",
37
+ },
38
+ {
39
+ "title": "Sprint Log",
40
+ "icon": "📋",
41
+ "description": "Weekly auto-generated summaries of code changes and documentation updates.",
42
+ },
43
+ {
44
+ "title": "Task Board",
45
+ "icon": "✅",
46
+ "description": "Undocumented areas, documentation debt, and improvement tasks.",
47
+ },
48
+ ]
49
+
50
+
51
+ def extract_page_id(result) -> str | None:
52
+ """Extract a page ID from an MCP CallToolResult.
53
+
54
+ The response text typically contains markdown with page URLs.
55
+ We look for a UUID pattern which is the page ID.
56
+ """
57
+ import re
58
+
59
+ if result.isError:
60
+ return None
61
+
62
+ if not result.content:
63
+ return None
64
+
65
+ text = result.content[0].text
66
+
67
+ # Look for UUID pattern (with or without dashes)
68
+ uuid_pattern = r"[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}"
69
+ match = re.search(uuid_pattern, text, re.IGNORECASE)
70
+ if match:
71
+ return normalize_page_id(match.group(0))
72
+
73
+ return text
74
+
75
+
76
+ async def search_page_by_title(session: ClientSession, title: str) -> str | None:
77
+ """Search Notion for a page by title, return page_id if found."""
78
+ from codebase_cortex.utils.rate_limiter import NotionRateLimiter
79
+
80
+ rate_limiter = NotionRateLimiter()
81
+ await rate_limiter.acquire()
82
+
83
+ try:
84
+ result = await session.call_tool(
85
+ "notion-search",
86
+ arguments={"query": title},
87
+ )
88
+ if result.isError or not result.content:
89
+ return None
90
+
91
+ # The search result text contains page info with IDs
92
+ import re
93
+ text = result.content[0].text
94
+
95
+ # Look for the title in results and extract its page ID
96
+ # Notion search returns markdown with page URLs/IDs
97
+ uuid_pattern = r"[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}"
98
+ match = re.search(uuid_pattern, text, re.IGNORECASE)
99
+ if match and title.lower() in text.lower():
100
+ return normalize_page_id(match.group(0))
101
+
102
+ return None
103
+ except Exception:
104
+ return None
105
+
106
+
107
+ async def discover_child_pages(settings: Settings) -> int:
108
+ """Discover child pages under the parent Notion page and cache them.
109
+
110
+ Fetches the parent page via MCP, extracts child page references
111
+ from the content, and caches any pages not already tracked.
112
+
113
+ Returns the number of newly discovered pages.
114
+ """
115
+ import re
116
+ from codebase_cortex.mcp_client import notion_mcp_session
117
+ from codebase_cortex.utils.rate_limiter import NotionRateLimiter
118
+ from codebase_cortex.notion.page_cache import PageCache
119
+
120
+ logger = get_logger()
121
+ cache = PageCache(cache_path=settings.page_cache_path)
122
+ parent_page = cache.find_by_title("Codebase Cortex")
123
+ if not parent_page:
124
+ return 0
125
+
126
+ rate_limiter = NotionRateLimiter()
127
+ discovered = 0
128
+
129
+ try:
130
+ async with notion_mcp_session(settings) as session:
131
+ await rate_limiter.acquire()
132
+ result = await session.call_tool(
133
+ "notion-fetch",
134
+ arguments={"id": parent_page.page_id},
135
+ )
136
+
137
+ if result.isError or not result.content:
138
+ return 0
139
+
140
+ response_text = result.content[0].text
141
+
142
+ # Extract content section (child pages are referenced there)
143
+ content_match = re.search(
144
+ r"<content>\s*(.*?)\s*</content>",
145
+ response_text,
146
+ re.DOTALL,
147
+ )
148
+ content = content_match.group(1) if content_match else response_text
149
+
150
+ # Find all UUID patterns in the content (child page references)
151
+ uuid_pattern = r"[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}"
152
+ raw_ids = re.findall(uuid_pattern, content, re.IGNORECASE)
153
+ found_ids = {normalize_page_id(pid) for pid in raw_ids}
154
+
155
+ # Remove parent's own ID
156
+ found_ids.discard(parent_page.page_id)
157
+
158
+ # Filter to only truly new pages (not already cached)
159
+ new_ids = [pid for pid in found_ids if not cache.get(pid)]
160
+
161
+ for page_id in new_ids:
162
+ await rate_limiter.acquire()
163
+ try:
164
+ fetch_result = await session.call_tool(
165
+ "notion-fetch",
166
+ arguments={"id": page_id},
167
+ )
168
+ if not fetch_result.isError and fetch_result.content:
169
+ page_text = fetch_result.content[0].text
170
+ title_match = re.search(
171
+ r'"title"\s*:\s*"([^"]+)"', page_text
172
+ )
173
+ if title_match:
174
+ title = title_match.group(1)
175
+ cache.upsert(page_id, title)
176
+ discovered += 1
177
+ logger.info(f"Discovered child page: {title}")
178
+ except Exception:
179
+ pass
180
+
181
+ except Exception as e:
182
+ logger.warning(f"Child page discovery failed: {e}")
183
+
184
+ return discovered
185
+
186
+
187
+ async def bootstrap_notion_pages(settings: Settings) -> list[dict]:
188
+ """Create the starter Notion pages via MCP tools.
189
+
190
+ Creates a parent "Codebase Cortex" page, then child pages under it.
191
+ Searches for existing pages first to avoid duplicates.
192
+ Seeds the page cache with all created/found pages.
193
+
194
+ Args:
195
+ settings: Application settings with Notion token path.
196
+
197
+ Returns:
198
+ List of page info dicts with page_id and title.
199
+ """
200
+ from codebase_cortex.mcp_client import notion_mcp_session
201
+ from codebase_cortex.utils.rate_limiter import NotionRateLimiter
202
+
203
+ rate_limiter = NotionRateLimiter()
204
+ cache = PageCache(cache_path=settings.page_cache_path)
205
+ pages = []
206
+
207
+ repo_name = settings.repo_path.name
208
+ parent_title = repo_name
209
+
210
+ async with notion_mcp_session(settings) as session:
211
+ # Step 1: Search for existing parent page
212
+ parent_id = await search_page_by_title(session, PARENT_PAGE_TITLE)
213
+
214
+ # Step 2: Create parent page if not found
215
+ if not parent_id:
216
+ await rate_limiter.acquire()
217
+ try:
218
+ result = await session.call_tool(
219
+ "notion-create-pages",
220
+ arguments={
221
+ "pages": [
222
+ {
223
+ "properties": {"title": parent_title},
224
+ "content": (
225
+ f"# {repo_name}\n\n"
226
+ f"Auto-generated documentation hub for **{repo_name}**.\n\n"
227
+ "Managed by [Codebase Cortex](https://github.com/sarupurisailalith/codebase-cortex)."
228
+ ),
229
+ }
230
+ ],
231
+ },
232
+ )
233
+ parent_id = extract_page_id(result)
234
+ if parent_id:
235
+ cache.upsert(parent_id, PARENT_PAGE_TITLE)
236
+ logger.info(f"Created parent page: {parent_title}")
237
+ else:
238
+ logger.error("Failed to extract parent page ID from response")
239
+ return []
240
+ except Exception as e:
241
+ logger.error(f"Failed to create parent page: {e}")
242
+ return []
243
+ else:
244
+ cache.upsert(parent_id, PARENT_PAGE_TITLE)
245
+ logger.info(f"Found existing parent page: {PARENT_PAGE_TITLE}")
246
+
247
+ # Step 3: Create child pages under parent
248
+ for page_info in STARTER_PAGES:
249
+ title = page_info["title"]
250
+ display_title = f"{page_info['icon']} {title}"
251
+
252
+ # Check cache first, then search Notion
253
+ cached = cache.find_by_title(title)
254
+ if cached:
255
+ pages.append({"title": title, "page_id": cached.page_id})
256
+ logger.info(f"Already exists (cached): {display_title}")
257
+ continue
258
+
259
+ existing_id = await search_page_by_title(session, title)
260
+ if existing_id:
261
+ cache.upsert(existing_id, title)
262
+ pages.append({"title": title, "page_id": existing_id})
263
+ logger.info(f"Found existing: {display_title}")
264
+ continue
265
+
266
+ # Create new page under parent
267
+ await rate_limiter.acquire()
268
+ try:
269
+ content = (
270
+ f"# {title}\n\n"
271
+ f"{page_info['description']}\n\n"
272
+ "---\n*Auto-generated by Codebase Cortex*"
273
+ )
274
+ result = await session.call_tool(
275
+ "notion-create-pages",
276
+ arguments={
277
+ "parent": {"page_id": parent_id},
278
+ "pages": [
279
+ {
280
+ "properties": {"title": display_title},
281
+ "content": content,
282
+ }
283
+ ],
284
+ },
285
+ )
286
+
287
+ page_id = extract_page_id(result)
288
+ if page_id:
289
+ cache.upsert(page_id, title)
290
+ pages.append({"title": title, "page_id": page_id})
291
+ logger.info(f"Created: {display_title}")
292
+ else:
293
+ logger.error(f"Failed to extract page ID for '{title}'")
294
+
295
+ except Exception as e:
296
+ logger.error(f"Failed to create page '{title}': {e}")
297
+
298
+ return pages
@@ -0,0 +1,107 @@
1
+ """Local cache for Notion page metadata with staleness tracking."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ import time
8
+ import unicodedata
9
+ from pathlib import Path
10
+ from dataclasses import dataclass, field
11
+
12
+
13
+ @dataclass
14
+ class CachedPage:
15
+ """A cached Notion page entry."""
16
+
17
+ page_id: str
18
+ title: str
19
+ last_synced: float
20
+ content_hash: str = ""
21
+
22
+ def is_stale(self, max_age: float = 3600.0) -> bool:
23
+ """Check if the cache entry is older than max_age seconds."""
24
+ return (time.time() - self.last_synced) > max_age
25
+
26
+
27
+ @dataclass
28
+ class PageCache:
29
+ """In-memory cache of Notion pages, backed by a JSON file."""
30
+
31
+ cache_path: Path
32
+ pages: dict[str, CachedPage] = field(default_factory=dict)
33
+
34
+ def __post_init__(self) -> None:
35
+ self._load()
36
+
37
+ def _load(self) -> None:
38
+ if self.cache_path.exists():
39
+ data = json.loads(self.cache_path.read_text())
40
+ self.pages = {
41
+ pid: CachedPage(**entry) for pid, entry in data.items()
42
+ }
43
+
44
+ def save(self) -> None:
45
+ self.cache_path.parent.mkdir(parents=True, exist_ok=True)
46
+ data = {
47
+ pid: {
48
+ "page_id": p.page_id,
49
+ "title": p.title,
50
+ "last_synced": p.last_synced,
51
+ "content_hash": p.content_hash,
52
+ }
53
+ for pid, p in self.pages.items()
54
+ }
55
+ self.cache_path.write_text(json.dumps(data, indent=2))
56
+
57
+ def upsert(self, page_id: str, title: str, content_hash: str = "") -> None:
58
+ self.pages[page_id] = CachedPage(
59
+ page_id=page_id,
60
+ title=title,
61
+ last_synced=time.time(),
62
+ content_hash=content_hash,
63
+ )
64
+ self.save()
65
+
66
+ def get(self, page_id: str) -> CachedPage | None:
67
+ return self.pages.get(page_id)
68
+
69
+ def get_stale(self, max_age: float = 3600.0) -> list[CachedPage]:
70
+ return [p for p in self.pages.values() if p.is_stale(max_age)]
71
+
72
+ @staticmethod
73
+ def _normalize_title(title: str) -> str:
74
+ """Strip emojis, special characters, and normalize whitespace for comparison."""
75
+ # Remove characters in emoji-related Unicode categories (So = Symbol, other)
76
+ cleaned = "".join(
77
+ ch for ch in title
78
+ if unicodedata.category(ch) not in ("So", "Sk", "Sc", "Sm")
79
+ )
80
+ # Remove non-alphanumeric characters (keep spaces)
81
+ cleaned = re.sub(r"[^a-zA-Z0-9\s]", "", cleaned)
82
+ # Collapse whitespace and strip
83
+ return re.sub(r"\s+", " ", cleaned).strip().lower()
84
+
85
+ def find_by_title(self, title: str) -> CachedPage | None:
86
+ """Find a page by title: exact match first, then fuzzy fallback."""
87
+ for page in self.pages.values():
88
+ if page.title == title:
89
+ return page
90
+ return self.find_by_title_fuzzy(title)
91
+
92
+ def find_by_title_fuzzy(self, title: str) -> CachedPage | None:
93
+ """Find the best matching page by normalized title comparison."""
94
+ normalized = self._normalize_title(title)
95
+ if not normalized:
96
+ return None
97
+ for page in self.pages.values():
98
+ if self._normalize_title(page.title) == normalized:
99
+ return page
100
+ return None
101
+
102
+ def find_all_doc_pages(self) -> list[CachedPage]:
103
+ """Return all cached pages except infrastructure pages."""
104
+ return [
105
+ p for p in self.pages.values()
106
+ if p.title != "Codebase Cortex"
107
+ ]
@@ -0,0 +1,77 @@
1
+ """CortexState — shared state for the LangGraph pipeline."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TypedDict
6
+
7
+
8
+ class FileChange(TypedDict):
9
+ """A single file change extracted from a diff."""
10
+
11
+ path: str
12
+ status: str # "added" | "modified" | "deleted" | "renamed"
13
+ additions: int
14
+ deletions: int
15
+ diff: str
16
+
17
+
18
+ class DocUpdate(TypedDict):
19
+ """A documentation update to apply in Notion."""
20
+
21
+ page_id: str | None # None = create new page
22
+ title: str
23
+ content: str
24
+ action: str # "create" | "update"
25
+
26
+
27
+ class TaskItem(TypedDict):
28
+ """A task/ticket to create in Notion."""
29
+
30
+ title: str
31
+ description: str
32
+ priority: str # "high" | "medium" | "low"
33
+
34
+
35
+ class RelatedDoc(TypedDict, total=False):
36
+ """A semantically related existing document."""
37
+
38
+ page_id: str
39
+ title: str
40
+ similarity: float
41
+ content: str # Code chunk content for LLM context
42
+
43
+
44
+ class CortexState(TypedDict, total=False):
45
+ """Shared state flowing through the LangGraph pipeline.
46
+
47
+ Fields are populated progressively by each agent node.
48
+ """
49
+
50
+ # Input / trigger
51
+ trigger: str # "commit" | "pr" | "schedule" | "manual"
52
+ repo_path: str
53
+ dry_run: bool
54
+ full_scan: bool # True = analyze entire codebase, not just recent diff
55
+
56
+ # Git data
57
+ diff_text: str
58
+ changed_files: list[FileChange]
59
+
60
+ # CodeAnalyzer output
61
+ analysis: str
62
+
63
+ # SemanticFinder output
64
+ related_docs: list[RelatedDoc]
65
+
66
+ # DocWriter output
67
+ doc_updates: list[DocUpdate]
68
+
69
+ # TaskCreator output
70
+ tasks_created: list[TaskItem]
71
+
72
+ # SprintReporter output
73
+ sprint_summary: str
74
+
75
+ # Pipeline metadata
76
+ errors: list[str]
77
+ mcp_tools: list
File without changes
@@ -0,0 +1,59 @@
1
+ """Robust JSON array parsing from LLM responses."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+
8
+
9
+ def parse_json_array(raw: str) -> list[dict]:
10
+ """Extract a JSON array from an LLM response, handling common quirks.
11
+
12
+ Handles:
13
+ - Raw JSON arrays
14
+ - JSON wrapped in markdown code blocks (```json ... ```)
15
+ - Trailing commas
16
+ - Text before/after the JSON array
17
+
18
+ Args:
19
+ raw: Raw LLM response text.
20
+
21
+ Returns:
22
+ Parsed list of dicts.
23
+
24
+ Raises:
25
+ ValueError: If no valid JSON array can be extracted.
26
+ """
27
+ # Try direct parse first
28
+ text = raw.strip()
29
+ try:
30
+ result = json.loads(text)
31
+ if isinstance(result, list):
32
+ return result
33
+ except json.JSONDecodeError:
34
+ pass
35
+
36
+ # Extract from markdown code blocks
37
+ code_block_match = re.search(r"```(?:json)?\s*\n?(.*?)```", text, re.DOTALL)
38
+ if code_block_match:
39
+ try:
40
+ result = json.loads(code_block_match.group(1).strip())
41
+ if isinstance(result, list):
42
+ return result
43
+ except json.JSONDecodeError:
44
+ pass
45
+
46
+ # Find the outermost [ ... ] in the response
47
+ bracket_match = re.search(r"\[.*\]", text, re.DOTALL)
48
+ if bracket_match:
49
+ candidate = bracket_match.group(0)
50
+ # Remove trailing commas before ] (common LLM mistake)
51
+ candidate = re.sub(r",\s*\]", "]", candidate)
52
+ try:
53
+ result = json.loads(candidate)
54
+ if isinstance(result, list):
55
+ return result
56
+ except json.JSONDecodeError:
57
+ pass
58
+
59
+ raise ValueError(f"Could not extract JSON array from LLM response: {text[:200]}")
@@ -0,0 +1,62 @@
1
+ """Rich-based logging for Codebase Cortex."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from pathlib import Path
7
+
8
+ from rich.console import Console
9
+ from rich.logging import RichHandler
10
+
11
+ console = Console()
12
+
13
+ # Module-level flag for verbose/debug mode
14
+ _verbose = False
15
+
16
+
17
+ def setup_logging(level: int = logging.INFO, verbose: bool = False) -> logging.Logger:
18
+ """Configure and return the application logger."""
19
+ global _verbose
20
+ _verbose = verbose
21
+
22
+ if verbose:
23
+ level = logging.DEBUG
24
+
25
+ handler = RichHandler(
26
+ console=console,
27
+ show_path=False,
28
+ markup=True,
29
+ )
30
+ handler.setFormatter(logging.Formatter("%(message)s"))
31
+
32
+ # Also log to .cortex/debug.log when verbose
33
+ logger = logging.getLogger("cortex")
34
+ logger.setLevel(level)
35
+
36
+ # Remove existing handlers to avoid duplicates
37
+ logger.handlers.clear()
38
+ logger.addHandler(handler)
39
+
40
+ if verbose:
41
+ cortex_dir = Path.cwd() / ".cortex"
42
+ if cortex_dir.exists():
43
+ file_handler = logging.FileHandler(cortex_dir / "debug.log")
44
+ file_handler.setLevel(logging.DEBUG)
45
+ file_handler.setFormatter(
46
+ logging.Formatter("%(asctime)s %(levelname)s %(message)s")
47
+ )
48
+ logger.addHandler(file_handler)
49
+
50
+ return logger
51
+
52
+
53
+ def get_logger() -> logging.Logger:
54
+ """Get the cortex logger, creating it if needed."""
55
+ logger = logging.getLogger("cortex")
56
+ if not logger.handlers:
57
+ return setup_logging()
58
+ return logger
59
+
60
+
61
+ def is_verbose() -> bool:
62
+ return _verbose
@@ -0,0 +1,56 @@
1
+ """Async token bucket rate limiter for Notion MCP API calls."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import time
7
+
8
+
9
+ class TokenBucket:
10
+ """Token bucket rate limiter.
11
+
12
+ Args:
13
+ rate: Number of tokens added per second.
14
+ capacity: Maximum tokens in the bucket.
15
+ """
16
+
17
+ def __init__(self, rate: float, capacity: int) -> None:
18
+ self.rate = rate
19
+ self.capacity = capacity
20
+ self._tokens = float(capacity)
21
+ self._last_refill = time.monotonic()
22
+ self._lock = asyncio.Lock()
23
+
24
+ def _refill(self) -> None:
25
+ now = time.monotonic()
26
+ elapsed = now - self._last_refill
27
+ self._tokens = min(self.capacity, self._tokens + elapsed * self.rate)
28
+ self._last_refill = now
29
+
30
+ async def acquire(self, tokens: int = 1) -> None:
31
+ """Wait until the requested number of tokens are available."""
32
+ async with self._lock:
33
+ while True:
34
+ self._refill()
35
+ if self._tokens >= tokens:
36
+ self._tokens -= tokens
37
+ return
38
+ wait = (tokens - self._tokens) / self.rate
39
+ await asyncio.sleep(wait)
40
+
41
+
42
+ class NotionRateLimiter:
43
+ """Dual token bucket for Notion MCP rate limits.
44
+
45
+ - General: 180 requests/minute (3/sec)
46
+ - Search: 30 requests/minute (0.5/sec)
47
+ """
48
+
49
+ def __init__(self) -> None:
50
+ self.general = TokenBucket(rate=3.0, capacity=180)
51
+ self.search = TokenBucket(rate=0.5, capacity=30)
52
+
53
+ async def acquire(self, is_search: bool = False) -> None:
54
+ await self.general.acquire()
55
+ if is_search:
56
+ await self.search.acquire()