codeer-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeer_cli/kb.py ADDED
@@ -0,0 +1,226 @@
1
+ """Knowledge base CRUD + file upload.
2
+
3
+ KBs are tree-shaped (KnowledgeNode): the root node is the KB itself, children are
4
+ folders or files. All endpoints are scoped under an organization + workspace.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import mimetypes
10
+ from pathlib import Path
11
+ from typing import Any, List, Optional
12
+
13
+ from .client import CodeerClient
14
+
15
+
16
+ # Extensions whose MIME type the system table often gets wrong on macOS/Linux,
17
+ # and what the Codeer upload validator expects. Backend accepts any ``text/*``
18
+ # subtype plus a fixed set of document/image mimetypes — see
19
+ # ``codeer/common/files.py :: validate_uploaded_file``.
20
+ _MIME_OVERRIDES = {
21
+ ".md": "text/markdown",
22
+ ".markdown": "text/markdown",
23
+ ".txt": "text/plain",
24
+ ".csv": "text/csv",
25
+ }
26
+
27
+
28
+ def _guess_mime(filename: str) -> str:
29
+ ext = Path(filename).suffix.lower()
30
+ if ext in _MIME_OVERRIDES:
31
+ return _MIME_OVERRIDES[ext]
32
+ guess, _ = mimetypes.guess_type(filename)
33
+ # Last-resort fallback. Backend rejects application/octet-stream with
34
+ # "Content-Type is missing"-class errors, so prefer text/plain for anything
35
+ # unknown — validator then only passes if the extension is also allowed.
36
+ return guess or "application/octet-stream"
37
+
38
+
39
+ def _base(organization_id: str, workspace_id: str) -> str:
40
+ return "/external/knowledge-bases"
41
+
42
+
43
+ def list_nodes(
44
+ client: CodeerClient,
45
+ *,
46
+ organization_id: str,
47
+ workspace_id: str,
48
+ parent_id: Optional[str] = None,
49
+ ) -> list[dict]:
50
+ """List children of a node. Omit parent_id to list top-level KBs."""
51
+ params = {"parent_id": parent_id} if parent_id else None
52
+ return client.get(f"{_base(organization_id, workspace_id)}/nodes", params=params)
53
+
54
+
55
+ def create_kb(
56
+ client: CodeerClient,
57
+ *,
58
+ organization_id: str,
59
+ workspace_id: str,
60
+ name: str,
61
+ description: Optional[str] = None,
62
+ ) -> dict:
63
+ """Create a top-level KB (a folder with no parent).
64
+
65
+ ``POST /nodes`` has no ``type`` field — the server infers KB-root vs nested
66
+ folder from whether ``parent_id`` is set. Use :func:`create_folder` for
67
+ folders under an existing KB.
68
+ """
69
+ body: dict[str, Any] = {"name": name}
70
+ if description is not None:
71
+ body["description"] = description
72
+ return client.post(f"{_base(organization_id, workspace_id)}/nodes", json=body)
73
+
74
+
75
+ def create_folder(
76
+ client: CodeerClient,
77
+ *,
78
+ organization_id: str,
79
+ workspace_id: str,
80
+ parent_id: str,
81
+ name: str,
82
+ description: Optional[str] = None,
83
+ ) -> dict:
84
+ """Create a folder inside a KB. **Pass the KB root id as ``parent_id``.**
85
+
86
+ A KB is structured as exactly one level of folders: KB root → files or
87
+ folders → files (inside folders). Nested folders (folder-inside-folder)
88
+ are a UI-level non-feature — don't pass a folder's id as ``parent_id``
89
+ here, or the resulting structure will be invisible in the file manager.
90
+
91
+ When flattening source material for a KB, use the ``kb-indexing`` skill
92
+ first to collapse deep trees into single-level folder names encoded in
93
+ the filename (e.g. ``products/a.md``). See that skill's docs.
94
+ """
95
+ body: dict[str, Any] = {"parent_id": parent_id, "name": name}
96
+ if description is not None:
97
+ body["description"] = description
98
+ return client.post(f"{_base(organization_id, workspace_id)}/nodes", json=body)
99
+
100
+
101
+ def create_node(
102
+ client: CodeerClient,
103
+ *,
104
+ organization_id: str,
105
+ workspace_id: str,
106
+ name: str,
107
+ parent_id: Optional[str] = None,
108
+ description: Optional[str] = None,
109
+ ) -> dict:
110
+ """Generic create — ``parent_id=None`` creates a KB root, otherwise a folder.
111
+
112
+ Kept for cases where the caller is iterating a tree and only has the parent
113
+ id to decide. Prefer :func:`create_kb` / :func:`create_folder` in new code.
114
+ """
115
+ body: dict[str, Any] = {"name": name}
116
+ if parent_id is not None:
117
+ body["parent_id"] = parent_id
118
+ if description is not None:
119
+ body["description"] = description
120
+ return client.post(f"{_base(organization_id, workspace_id)}/nodes", json=body)
121
+
122
+
123
+ def update_node(
124
+ client: CodeerClient,
125
+ *,
126
+ organization_id: str,
127
+ workspace_id: str,
128
+ node_id: str,
129
+ name: Optional[str] = None,
130
+ ) -> dict:
131
+ body: dict[str, Any] = {}
132
+ if name is not None:
133
+ body["name"] = name
134
+ return client.patch(f"{_base(organization_id, workspace_id)}/nodes/{node_id}", json=body)
135
+
136
+
137
+ def upload_file(
138
+ client: CodeerClient,
139
+ *,
140
+ organization_id: str,
141
+ workspace_id: str,
142
+ kb_id: str,
143
+ file_path: str | Path,
144
+ parent_id: str,
145
+ ) -> dict:
146
+ """Upload a single file. See :func:`upload_files` for the bulk form.
147
+
148
+ ``parent_id`` is required (the KB root counts as a folder — pass its id to
149
+ put a file at the top level).
150
+ """
151
+ return upload_files(
152
+ client,
153
+ organization_id=organization_id,
154
+ workspace_id=workspace_id,
155
+ kb_id=kb_id,
156
+ file_paths=[file_path],
157
+ parent_id=parent_id,
158
+ )
159
+
160
+
161
+ def upload_files(
162
+ client: CodeerClient,
163
+ *,
164
+ organization_id: str,
165
+ workspace_id: str,
166
+ kb_id: str,
167
+ file_paths: List[str | Path],
168
+ parent_id: str,
169
+ ) -> dict:
170
+ """Upload one or more files into a KB folder in a single request.
171
+
172
+ Backend quirks baked in:
173
+
174
+ - The form body is a single JSON-encoded field named ``form`` (Django
175
+ Ninja's default for a ``Schema``-typed form param), not flattened.
176
+ - Each file must include an explicit ``Content-Type``; httpx's default
177
+ ``application/octet-stream`` gets rejected by the KB validator.
178
+ :func:`_guess_mime` supplies a sensible default per extension.
179
+ - The response is ``{"nodes": [{"node_id": "...", "status": "PENDING", ...}]}``.
180
+ - Upload kicks off async indexing. Poll :func:`file_status` on the returned
181
+ ``node_id`` values until each is ``READY``/``FAILED``.
182
+ """
183
+ if not parent_id:
184
+ raise ValueError("parent_id is required (use the KB root id to upload at top level)")
185
+ if not file_paths:
186
+ raise ValueError("file_paths must contain at least one path")
187
+
188
+ paths = [Path(p) for p in file_paths]
189
+ for p in paths:
190
+ if not p.is_file():
191
+ raise FileNotFoundError(p)
192
+
193
+ url = f"{_base(organization_id, workspace_id)}/files:upload"
194
+ open_handles = [p.open("rb") for p in paths]
195
+ try:
196
+ files = [("files", (p.name, fh, _guess_mime(p.name))) for p, fh in zip(paths, open_handles)]
197
+ data = {"parent_id": parent_id}
198
+ return client.post(url, files=files, data=data)
199
+ finally:
200
+ for fh in open_handles:
201
+ fh.close()
202
+
203
+
204
+ def file_status(
205
+ client: CodeerClient,
206
+ *,
207
+ organization_id: str,
208
+ workspace_id: str,
209
+ node_ids: List[str],
210
+ ) -> list[dict]:
211
+ """Batch-check indexing status for KB file nodes."""
212
+ return client.post(
213
+ f"{_base(organization_id, workspace_id)}/files:status",
214
+ json={"node_ids": node_ids},
215
+ )
216
+
217
+
218
+ def read_file_content(
219
+ client: CodeerClient,
220
+ *,
221
+ organization_id: str,
222
+ workspace_id: str,
223
+ kb_id: str,
224
+ node_id: str,
225
+ ) -> dict:
226
+ return client.get(f"{_base(organization_id, workspace_id)}/files/{node_id}/content")