okb 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,123 @@
1
+ """Plugin discovery and registration via entry_points."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from importlib.metadata import entry_points
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING
8
+
9
+ if TYPE_CHECKING:
10
+ from .base import APISource, FileParser
11
+
12
+
13
+ class PluginRegistry:
14
+ """Registry for file parsers and API sources discovered via entry_points.
15
+
16
+ Plugins are discovered from two entry_point groups:
17
+ - okb.parsers: FileParser implementations
18
+ - okb.sources: APISource implementations
19
+
20
+ Example pyproject.toml for a plugin:
21
+ [project.entry-points."okb.parsers"]
22
+ epub = "okb_epub:EpubParser"
23
+
24
+ [project.entry-points."okb.sources"]
25
+ github = "okb_github:GitHubSource"
26
+ """
27
+
28
+ _parsers: dict[str, list[FileParser]] = {} # ext -> list of parsers
29
+ _sources: dict[str, APISource] = {} # name -> source
30
+ _loaded = False
31
+
32
+ @classmethod
33
+ def load_plugins(cls) -> None:
34
+ """Load all plugins from entry_points. Called automatically on first use."""
35
+ if cls._loaded:
36
+ return
37
+
38
+ # Load file parsers
39
+ parser_eps = entry_points(group="okb.parsers")
40
+ for ep in parser_eps:
41
+ try:
42
+ parser_cls = ep.load()
43
+ parser = parser_cls()
44
+ for ext in parser.extensions:
45
+ ext_lower = ext.lower()
46
+ if ext_lower not in cls._parsers:
47
+ cls._parsers[ext_lower] = []
48
+ cls._parsers[ext_lower].append(parser)
49
+ except Exception as e:
50
+ print(f"Warning: Failed to load parser plugin '{ep.name}': {e}")
51
+
52
+ # Load API sources
53
+ source_eps = entry_points(group="okb.sources")
54
+ for ep in source_eps:
55
+ try:
56
+ source_cls = ep.load()
57
+ source = source_cls()
58
+ cls._sources[source.name] = source
59
+ except Exception as e:
60
+ print(f"Warning: Failed to load source plugin '{ep.name}': {e}")
61
+
62
+ cls._loaded = True
63
+
64
+ @classmethod
65
+ def get_parser_for_file(cls, path: Path) -> FileParser | None:
66
+ """Find a parser that can handle this file.
67
+
68
+ First filters by extension, then calls can_parse() on each candidate.
69
+
70
+ Args:
71
+ path: Path to the file to parse
72
+
73
+ Returns:
74
+ FileParser instance that can handle the file, or None
75
+ """
76
+ cls.load_plugins()
77
+ ext = path.suffix.lower()
78
+ for parser in cls._parsers.get(ext, []):
79
+ if parser.can_parse(path):
80
+ return parser
81
+ return None
82
+
83
+ @classmethod
84
+ def get_source(cls, name: str) -> APISource | None:
85
+ """Get an API source by name.
86
+
87
+ Args:
88
+ name: Source name (e.g., 'github', 'todoist')
89
+
90
+ Returns:
91
+ APISource instance, or None if not found
92
+ """
93
+ cls.load_plugins()
94
+ return cls._sources.get(name)
95
+
96
+ @classmethod
97
+ def list_sources(cls) -> list[str]:
98
+ """List all available API source names.
99
+
100
+ Returns:
101
+ List of source names
102
+ """
103
+ cls.load_plugins()
104
+ return list(cls._sources.keys())
105
+
106
+ @classmethod
107
+ def list_parsers(cls) -> dict[str, list[str]]:
108
+ """List all registered parsers by extension.
109
+
110
+ Returns:
111
+ Dict mapping extension to list of parser source_type names
112
+ """
113
+ cls.load_plugins()
114
+ return {
115
+ ext: [p.source_type for p in parsers] for ext, parsers in cls._parsers.items()
116
+ }
117
+
118
+ @classmethod
119
+ def reset(cls) -> None:
120
+ """Reset the registry. Mainly useful for testing."""
121
+ cls._parsers = {}
122
+ cls._sources = {}
123
+ cls._loaded = False
@@ -0,0 +1,5 @@
1
+ """Built-in API source plugins for OKB."""
2
+
3
+ from okb.plugins.sources.dropbox_paper import DropboxPaperSource
4
+
5
+ __all__ = ["DropboxPaperSource"]
@@ -0,0 +1,188 @@
1
+ """Dropbox Paper API source for syncing Paper documents as markdown."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from datetime import UTC, datetime
7
+ from typing import TYPE_CHECKING
8
+
9
+ if TYPE_CHECKING:
10
+ from okb.ingest import Document
11
+ from okb.plugins.base import SyncState
12
+
13
+
14
+ class DropboxPaperSource:
15
+ """API source for Dropbox Paper documents.
16
+
17
+ Syncs Paper documents as markdown for searchable knowledge base entries.
18
+
19
+ Config example:
20
+ plugins:
21
+ sources:
22
+ dropbox-paper:
23
+ enabled: true
24
+ token: ${DROPBOX_TOKEN}
25
+ folders: [/] # Optional: filter to specific folder paths
26
+
27
+ Usage:
28
+ lkb sync run dropbox-paper
29
+ lkb sync run dropbox-paper --full # Ignore incremental state
30
+ """
31
+
32
+ name = "dropbox-paper"
33
+ source_type = "dropbox-paper"
34
+
35
+ def __init__(self) -> None:
36
+ self._client = None
37
+ self._folders: list[str] | None = None
38
+ self._doc_ids: list[str] | None = None
39
+
40
+ def configure(self, config: dict) -> None:
41
+ """Initialize Dropbox client with OAuth token.
42
+
43
+ Args:
44
+ config: Source configuration containing 'token' and optional 'folders' or 'doc_ids'
45
+ """
46
+ import dropbox
47
+
48
+ token = config.get("token")
49
+ if not token:
50
+ raise ValueError("dropbox-paper source requires 'token' in config")
51
+
52
+ self._client = dropbox.Dropbox(token)
53
+ self._folders = config.get("folders")
54
+ self._doc_ids = config.get("doc_ids") # Specific doc IDs from CLI
55
+
56
+ def fetch(self, state: SyncState | None = None) -> tuple[list[Document], SyncState]:
57
+ """Fetch Paper documents from Dropbox.
58
+
59
+ Uses the legacy Paper API to list and download documents as markdown.
60
+ Supports incremental sync via cursor-based pagination.
61
+
62
+ Args:
63
+ state: Previous sync state for incremental updates, or None for full sync
64
+
65
+ Returns:
66
+ Tuple of (list of documents, new sync state)
67
+ """
68
+ from okb.plugins.base import SyncState as SyncStateClass
69
+
70
+ if self._client is None:
71
+ raise RuntimeError("Source not configured. Call configure() first.")
72
+
73
+ documents: list[Document] = []
74
+ cursor = state.cursor if state else None
75
+
76
+ print("Fetching Dropbox Paper documents...", file=sys.stderr)
77
+
78
+ # Use specific doc IDs from CLI, or list all Paper docs
79
+ if self._doc_ids:
80
+ doc_ids = self._doc_ids
81
+ print(f"Syncing {len(doc_ids)} specific document(s)", file=sys.stderr)
82
+ else:
83
+ doc_ids = self._list_paper_docs(cursor)
84
+ print(f"Found {len(doc_ids)} Paper documents", file=sys.stderr)
85
+
86
+ for doc_id in doc_ids:
87
+ try:
88
+ doc = self._fetch_paper_doc(doc_id)
89
+ if doc:
90
+ # Apply folder filter if configured
91
+ if self._folders:
92
+ folder_path = doc.metadata.extra.get("folder_path", "/")
93
+ if not any(folder_path.startswith(f) for f in self._folders):
94
+ continue
95
+ documents.append(doc)
96
+ print(f" Synced: {doc.title}", file=sys.stderr)
97
+ except Exception as e:
98
+ print(f" Error fetching doc {doc_id}: {e}", file=sys.stderr)
99
+
100
+ # Build new sync state
101
+ new_state = SyncStateClass(
102
+ last_sync=datetime.now(UTC),
103
+ cursor=cursor, # Paper API doesn't provide incremental cursors
104
+ )
105
+
106
+ return documents, new_state
107
+
108
+ def _list_paper_docs(self, cursor: str | None = None) -> list[str]:
109
+ """List all Paper document IDs.
110
+
111
+ Args:
112
+ cursor: Pagination cursor (not used by Paper API list)
113
+
114
+ Returns:
115
+ List of Paper document IDs
116
+ """
117
+
118
+ doc_ids = []
119
+
120
+ # Initial request
121
+ result = self._client.paper_docs_list()
122
+ doc_ids.extend(result.doc_ids)
123
+
124
+ # Paginate through all results
125
+ while result.has_more:
126
+ result = self._client.paper_docs_list_continue(result.cursor.value)
127
+ doc_ids.extend(result.doc_ids)
128
+
129
+ return doc_ids
130
+
131
+ def _fetch_paper_doc(self, doc_id: str) -> Document | None:
132
+ """Fetch a single Paper document and convert to Document.
133
+
134
+ Args:
135
+ doc_id: Dropbox Paper document ID
136
+
137
+ Returns:
138
+ Document instance or None if fetch failed
139
+ """
140
+ from dropbox.paper import ExportFormat
141
+
142
+ from okb.ingest import Document, DocumentMetadata
143
+
144
+ # Get document metadata
145
+ try:
146
+ folder_result = self._client.paper_docs_get_folder_info(doc_id)
147
+ folder_path = folder_result.folder_sharing_policy_type.name if folder_result else "/"
148
+ # Try to get actual folder path from folders list
149
+ if folder_result and hasattr(folder_result, "folders") and folder_result.folders:
150
+ folder_path = "/" + "/".join(f.name for f in folder_result.folders)
151
+ else:
152
+ folder_path = "/"
153
+ except Exception:
154
+ folder_path = "/"
155
+
156
+ # Download as markdown
157
+ result, response = self._client.paper_docs_download(
158
+ doc_id, ExportFormat.markdown
159
+ )
160
+
161
+ content = response.content.decode("utf-8")
162
+ if not content.strip():
163
+ return None
164
+
165
+ # Extract title from first heading or filename
166
+ title = result.title or f"Paper Doc {doc_id}"
167
+
168
+ # Parse modification time
169
+ doc_date = None
170
+ if hasattr(result, "server_modified"):
171
+ doc_date = result.server_modified.isoformat()
172
+
173
+ metadata = DocumentMetadata(
174
+ extra={
175
+ "folder_path": folder_path,
176
+ "doc_id": doc_id,
177
+ }
178
+ )
179
+ if doc_date:
180
+ metadata.extra["document_date"] = doc_date
181
+
182
+ return Document(
183
+ source_path=f"dropbox://paper/{doc_id}",
184
+ source_type=self.source_type,
185
+ title=title,
186
+ content=content,
187
+ metadata=metadata,
188
+ )