mikoshi 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mikoshi/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ __all__ = ["__version__"]
2
+
3
+ __version__ = "0.1.9"
mikoshi/auth.py ADDED
@@ -0,0 +1,265 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import hashlib
5
+ import json
6
+ import os
7
+ import secrets
8
+ import webbrowser
9
+ from dataclasses import dataclass
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Any
13
+ from urllib.parse import urlencode
14
+
15
+ import httpx
16
+
17
+ from mikoshi.entitlements import DEFAULT_FEATURES, DEFAULT_PLAN
18
+
19
+
20
+ AUTH_FILENAME = "auth.json"
21
+ CONFIG_FILENAME = "config.json"
22
+ DEFAULT_API_BASE_URL = "https://neet.gg"
23
+
24
+
25
+ class AuthError(RuntimeError):
26
+ pass
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class AuthState:
31
+ access_token: str
32
+ expires_at: str
33
+ plan: str
34
+ features: list[str]
35
+
36
+ def to_dict(self) -> dict[str, Any]:
37
+ return {
38
+ "access_token": self.access_token,
39
+ "expires_at": self.expires_at,
40
+ "plan": self.plan,
41
+ "features": list(self.features),
42
+ }
43
+
44
+ @staticmethod
45
+ def from_dict(data: dict[str, Any]) -> "AuthState":
46
+ features = data.get("features") or []
47
+ if isinstance(features, str):
48
+ features = [features]
49
+ return AuthState(
50
+ access_token=str(data.get("access_token", "")),
51
+ expires_at=str(data.get("expires_at", "")),
52
+ plan=str(data.get("plan", DEFAULT_PLAN)),
53
+ features=[str(item) for item in features],
54
+ )
55
+
56
+
57
+ @dataclass(frozen=True)
58
+ class BrokerConfig:
59
+ api_base_url: str
60
+
61
+
62
+ def _index_root() -> Path:
63
+ return Path(os.getenv("MIKOSHI_INDEX_ROOT", "~/.mikoshi")).expanduser()
64
+
65
+
66
+ def auth_path() -> Path:
67
+ return _index_root() / AUTH_FILENAME
68
+
69
+
70
+ def config_path() -> Path:
71
+ return _index_root() / CONFIG_FILENAME
72
+
73
+
74
+ def load_broker_config() -> BrokerConfig:
75
+ path = config_path()
76
+ if not path.exists():
77
+ return BrokerConfig(api_base_url=DEFAULT_API_BASE_URL)
78
+ try:
79
+ data = json.loads(path.read_text(encoding="utf-8"))
80
+ except Exception:
81
+ return BrokerConfig(api_base_url=DEFAULT_API_BASE_URL)
82
+ if not isinstance(data, dict):
83
+ return BrokerConfig(api_base_url=DEFAULT_API_BASE_URL)
84
+ api_base_url = str(data.get("api_base_url", "")).strip().rstrip("/")
85
+ if not api_base_url:
86
+ api_base_url = DEFAULT_API_BASE_URL
87
+ return BrokerConfig(api_base_url=api_base_url)
88
+
89
+
90
+ def save_broker_config(api_base_url: str) -> None:
91
+ clean_url = api_base_url.strip().rstrip("/") or DEFAULT_API_BASE_URL
92
+ path = config_path()
93
+ path.parent.mkdir(parents=True, exist_ok=True)
94
+ payload = json.dumps({"api_base_url": clean_url}, indent=2, sort_keys=True)
95
+ temp_path = path.with_suffix(".json.tmp")
96
+ fd = os.open(temp_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
97
+ with os.fdopen(fd, "w", encoding="utf-8") as handle:
98
+ handle.write(payload)
99
+ handle.write("\n")
100
+ os.replace(temp_path, path)
101
+ os.chmod(path, 0o600)
102
+
103
+
104
+ def load_auth_state() -> AuthState | None:
105
+ path = auth_path()
106
+ if not path.exists():
107
+ return None
108
+ try:
109
+ data = json.loads(path.read_text(encoding="utf-8"))
110
+ except Exception:
111
+ return None
112
+ if not isinstance(data, dict):
113
+ return None
114
+ return AuthState.from_dict(data)
115
+
116
+
117
+ def save_auth_state(state: AuthState) -> None:
118
+ path = auth_path()
119
+ path.parent.mkdir(parents=True, exist_ok=True)
120
+ payload = json.dumps(state.to_dict(), indent=2, sort_keys=True)
121
+ temp_path = path.with_suffix(".json.tmp")
122
+ fd = os.open(temp_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
123
+ with os.fdopen(fd, "w", encoding="utf-8") as handle:
124
+ handle.write(payload)
125
+ handle.write("\n")
126
+ os.replace(temp_path, path)
127
+ os.chmod(path, 0o600)
128
+
129
+
130
+ def clear_auth_state() -> None:
131
+ path = auth_path()
132
+ if path.exists():
133
+ path.unlink()
134
+
135
+
136
+ def _parse_expires_at(value: str) -> datetime | None:
137
+ if not value:
138
+ return None
139
+ try:
140
+ if value.endswith("Z"):
141
+ value = value[:-1] + "+00:00"
142
+ return datetime.fromisoformat(value)
143
+ except Exception:
144
+ return None
145
+
146
+
147
+ def is_expired(state: AuthState) -> bool:
148
+ expires_at = _parse_expires_at(state.expires_at)
149
+ if not expires_at:
150
+ return True
151
+ return datetime.now(timezone.utc) >= expires_at
152
+
153
+
154
+ def email_from_token(access_token: str) -> str | None:
155
+ try:
156
+ parts = access_token.split(".")
157
+ if len(parts) < 2:
158
+ return None
159
+ payload = parts[1] + "=" * (-len(parts[1]) % 4)
160
+ decoded = base64.urlsafe_b64decode(payload.encode("utf-8"))
161
+ data = json.loads(decoded.decode("utf-8"))
162
+ for key in ("email", "user_email", "preferred_username"):
163
+ value = data.get(key)
164
+ if value:
165
+ return str(value)
166
+ return None
167
+ except Exception:
168
+ return None
169
+
170
+
171
+ def _generate_state() -> str:
172
+ return secrets.token_urlsafe(32).rstrip("=")
173
+
174
+
175
+ def _generate_code_verifier() -> str:
176
+ return secrets.token_urlsafe(64).rstrip("=")
177
+
178
+
179
+ def _code_challenge(code_verifier: str) -> str:
180
+ digest = hashlib.sha256(code_verifier.encode("utf-8")).digest()
181
+ return base64.urlsafe_b64encode(digest).decode("utf-8").rstrip("=")
182
+
183
+
184
+ def _login_url(api_base_url: str, state: str, challenge: str) -> str:
185
+ query = {
186
+ "response_type": "code",
187
+ "client_id": "mikoshi",
188
+ "code_challenge": challenge,
189
+ "code_challenge_method": "S256",
190
+ "state": state,
191
+ }
192
+ return f"{api_base_url.rstrip('/')}/login?{urlencode(query)}"
193
+
194
+
195
+ def _parse_paste_payload(text: str) -> tuple[str, str]:
196
+ try:
197
+ data = json.loads(text)
198
+ except Exception as exc:
199
+ raise AuthError("Invalid JSON response.") from exc
200
+ if not isinstance(data, dict):
201
+ raise AuthError("Invalid JSON response.")
202
+ code = str(data.get("code", "")).strip()
203
+ state = str(data.get("state", "")).strip()
204
+ if not code or not state:
205
+ raise AuthError("Invalid JSON response.")
206
+ return code, state
207
+
208
+
209
+ def _ensure_state_match(expected: str, actual: str) -> None:
210
+ if expected != actual:
211
+ raise AuthError("State mismatch. Please retry login.")
212
+
213
+
214
+ def _exchange_code(
215
+ api_base_url: str, code: str, code_verifier: str, state: str
216
+ ) -> dict[str, Any]:
217
+ payload = {"code": code, "code_verifier": code_verifier, "state": state}
218
+ url = f"{api_base_url.rstrip('/')}/cli/exchange"
219
+ with httpx.Client(timeout=10.0) as client:
220
+ response = client.post(url, json=payload)
221
+ if response.status_code >= 400:
222
+ raise AuthError("Login failed. Please try again.")
223
+ data = response.json()
224
+ if not isinstance(data, dict):
225
+ raise AuthError("Login failed. Invalid response.")
226
+ return data
227
+
228
+
229
+ def login() -> str:
230
+ config = load_broker_config()
231
+ api_base_url = config.api_base_url
232
+ state = _generate_state()
233
+ code_verifier = _generate_code_verifier()
234
+ challenge = _code_challenge(code_verifier)
235
+ login_url = _login_url(api_base_url, state, challenge)
236
+
237
+ print("🔐 Starting authentication...")
238
+ print("🌐 Opening authentication page in your browser...")
239
+ opened = webbrowser.open(login_url, new=1, autoraise=True)
240
+ if not opened:
241
+ raise AuthError("Unable to open authentication page.")
242
+
243
+ raw = input("Paste the JSON response here: ").strip()
244
+ code, returned_state = _parse_paste_payload(raw)
245
+ _ensure_state_match(state, returned_state)
246
+
247
+ data = _exchange_code(api_base_url, code, code_verifier, returned_state)
248
+ access_token = str(data.get("access_token", "")).strip()
249
+ expires_at = str(data.get("expires_at", "")).strip()
250
+ email = str(data.get("email", "")).strip()
251
+ if not access_token or not expires_at or not email:
252
+ raise AuthError("Login failed. Invalid response.")
253
+ plan = str(data.get("plan") or DEFAULT_PLAN)
254
+ features = data.get("features") or list(DEFAULT_FEATURES)
255
+ if isinstance(features, str):
256
+ features = [features]
257
+
258
+ state_obj = AuthState(
259
+ access_token=access_token,
260
+ expires_at=expires_at,
261
+ plan=plan,
262
+ features=[str(item) for item in features],
263
+ )
264
+ save_auth_state(state_obj)
265
+ return email
mikoshi/chunking.py ADDED
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class ChunkSpan:
8
+ start_line: int
9
+ end_line: int
10
+ text: str
11
+
12
+
13
+ def chunk_text(text: str, max_lines: int, overlap: int) -> list[ChunkSpan]:
14
+ if max_lines <= 0:
15
+ raise ValueError("max_lines must be > 0")
16
+ if overlap < 0:
17
+ raise ValueError("overlap must be >= 0")
18
+ if overlap >= max_lines:
19
+ raise ValueError("overlap must be smaller than max_lines")
20
+
21
+ lines = text.splitlines()
22
+ if not lines:
23
+ return []
24
+
25
+ step = max_lines - overlap
26
+ chunks: list[ChunkSpan] = []
27
+ start = 0
28
+
29
+ while start < len(lines):
30
+ end = min(start + max_lines, len(lines))
31
+ chunk_lines = lines[start:end]
32
+ chunk_text_value = "\n".join(chunk_lines)
33
+ chunks.append(
34
+ ChunkSpan(
35
+ start_line=start + 1,
36
+ end_line=end,
37
+ text=chunk_text_value,
38
+ )
39
+ )
40
+ if end == len(lines):
41
+ break
42
+ start += step
43
+
44
+ return chunks
mikoshi/cli.py ADDED
@@ -0,0 +1,295 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ from mikoshi.config import ConfigError, configure_external_libs, load_config
10
+ from mikoshi.auth import (
11
+ AuthError,
12
+ DEFAULT_API_BASE_URL,
13
+ clear_auth_state,
14
+ email_from_token,
15
+ is_expired,
16
+ load_auth_state,
17
+ login,
18
+ save_broker_config,
19
+ )
20
+ from mikoshi.indexing.index_store import IndexStore
21
+ from mikoshi.utils.types import SearchResult
22
+
23
+
24
+ def _format_index_root(path: Path) -> str:
25
+ try:
26
+ home = Path.home().resolve()
27
+ resolved = path.expanduser().resolve()
28
+ if resolved == home:
29
+ return "~"
30
+ if str(resolved).startswith(str(home) + "/"):
31
+ return str(resolved).replace(str(home), "~", 1)
32
+ return str(resolved)
33
+ except Exception:
34
+ return str(path)
35
+
36
+
37
+ def _search(repo_path: str, query: str, k: int) -> list[SearchResult]:
38
+ from mikoshi.retrieval.hybrid import search_repo
39
+
40
+ return search_repo(repo_path, query, k)
41
+
42
+
43
+ def cmd_index(args: argparse.Namespace) -> int:
44
+ try:
45
+ from mikoshi.indexing.indexer import index_repo
46
+
47
+ result = index_repo(args.path)
48
+ except ConfigError as exc:
49
+ print(f"Config error: {exc}", file=sys.stderr)
50
+ return 2
51
+ print(
52
+ json.dumps(
53
+ {
54
+ "repo_id": result.repo_id,
55
+ "chunks_indexed": result.chunks_indexed,
56
+ "took_ms": result.took_ms,
57
+ },
58
+ indent=2,
59
+ )
60
+ )
61
+ return 0
62
+
63
+
64
+ def cmd_search(args: argparse.Namespace) -> int:
65
+ try:
66
+ results = _search(args.path, args.query, args.k)
67
+ except ConfigError as exc:
68
+ print(f"Config error: {exc}", file=sys.stderr)
69
+ return 2
70
+ except RuntimeError as exc:
71
+ print(str(exc), file=sys.stderr)
72
+ return 1
73
+
74
+ for result in results:
75
+ print(f"{result.relpath}:{result.start_line}-{result.end_line} ({result.score:.3f})")
76
+ print(result.snippet)
77
+ print()
78
+ return 0
79
+
80
+
81
+ def cmd_doctor(args: argparse.Namespace) -> int:
82
+ exit_code = 0
83
+ major, minor = sys.version_info[:2]
84
+ version_label = f"{major}.{minor}.x"
85
+ if (major, minor) >= (3, 11):
86
+ print(f"✅ Python: {version_label}")
87
+ else:
88
+ print(f"❌ Python: {version_label} (requires 3.11+)")
89
+ exit_code = 1
90
+
91
+ try:
92
+ config = load_config()
93
+ except ConfigError as exc:
94
+ print(f"❌ Config: {exc}")
95
+ return 1
96
+
97
+ print(f"✅ Mikoshi index root: {_format_index_root(config.index_root)}")
98
+ print(
99
+ f"✅ Embeddings: provider={config.embeddings.provider} "
100
+ f"model={config.embeddings.model}"
101
+ )
102
+
103
+ model_cached = True
104
+ if config.embeddings.provider == "local":
105
+ try:
106
+ from huggingface_hub import snapshot_download
107
+
108
+ try:
109
+ snapshot_download(
110
+ config.embeddings.model,
111
+ local_files_only=True,
112
+ )
113
+ model_cached = True
114
+ except Exception:
115
+ model_cached = False
116
+ except Exception:
117
+ print("❌ Dependencies: huggingface_hub missing")
118
+ return 1
119
+
120
+ offline = os.getenv("MIKOSHI_OFFLINE", "").strip().lower() in {
121
+ "1",
122
+ "true",
123
+ "yes",
124
+ "on",
125
+ }
126
+ if config.embeddings.provider == "local":
127
+ if not model_cached and offline:
128
+ print("❌ Model cached: no (offline)")
129
+ exit_code = 1
130
+ else:
131
+ print(f"✅ Model cached: {'yes' if model_cached else 'no'}")
132
+ else:
133
+ print("✅ Model cached: yes")
134
+
135
+ if args.path:
136
+ repo_root = Path(args.path).expanduser().resolve()
137
+ store = IndexStore(repo_root, config.index_root)
138
+ meta = store.load_meta()
139
+ if meta:
140
+ print(
141
+ "✅ Repo indexed: yes "
142
+ f"(chunks={meta.chunks}, last_index_time={meta.updated_at})"
143
+ )
144
+ else:
145
+ print("✅ Repo indexed: no")
146
+
147
+ return exit_code
148
+
149
+
150
+ def cmd_login(args: argparse.Namespace) -> int:
151
+ state = load_auth_state()
152
+ if state and not is_expired(state):
153
+ answer = input(
154
+ "⚠️ You are already logged in. Re-authenticating will replace your current session. Continue? (y/N):"
155
+ ).strip().lower()
156
+ if answer not in {"y", "yes"}:
157
+ return 0
158
+ try:
159
+ email = login()
160
+ except AuthError as exc:
161
+ print(str(exc), file=sys.stderr)
162
+ return 2
163
+ print(f"✅ Logged in as {email}")
164
+ return 0
165
+
166
+
167
+ def cmd_logout(args: argparse.Namespace) -> int:
168
+ clear_auth_state()
169
+ print("✅ Logged out")
170
+ return 0
171
+
172
+
173
+ def cmd_whoami(args: argparse.Namespace) -> int:
174
+ state = load_auth_state()
175
+ if not state or is_expired(state):
176
+ print("🔒 Not signed in")
177
+ return 1
178
+ email = email_from_token(state.access_token) or "unknown"
179
+ plan = state.plan.title() if state.plan else "Free"
180
+ print(f"✅ {email} ({plan})")
181
+ return 0
182
+
183
+
184
+ def cmd_auth_configure(args: argparse.Namespace) -> int:
185
+ prompt = f"API base URL [{DEFAULT_API_BASE_URL}]: "
186
+ api_base_url = input(prompt).strip() or DEFAULT_API_BASE_URL
187
+ try:
188
+ save_broker_config(api_base_url)
189
+ except AuthError as exc:
190
+ print(str(exc), file=sys.stderr)
191
+ return 2
192
+ print("✅ Auth configured")
193
+ return 0
194
+
195
+
196
+ def cmd_status(args: argparse.Namespace) -> int:
197
+ config = load_config()
198
+ repo_root = Path(args.path).expanduser().resolve()
199
+ store = IndexStore(repo_root, config.index_root)
200
+ meta = store.load_meta()
201
+ if not meta:
202
+ print(
203
+ json.dumps(
204
+ {
205
+ "indexed": False,
206
+ "chunks": 0,
207
+ "last_index_time": None,
208
+ "model": None,
209
+ },
210
+ indent=2,
211
+ )
212
+ )
213
+ return 0
214
+ print(
215
+ json.dumps(
216
+ {
217
+ "indexed": True,
218
+ "chunks": meta.chunks,
219
+ "last_index_time": meta.updated_at,
220
+ "model": meta.model,
221
+ },
222
+ indent=2,
223
+ )
224
+ )
225
+ return 0
226
+
227
+
228
+ def cmd_clear(args: argparse.Namespace) -> int:
229
+ config = load_config()
230
+ repo_root = Path(args.path).expanduser().resolve()
231
+ store = IndexStore(repo_root, config.index_root)
232
+ store.clear()
233
+ print(json.dumps({"ok": True}))
234
+ return 0
235
+
236
+
237
+ def build_parser() -> argparse.ArgumentParser:
238
+ parser = argparse.ArgumentParser(prog="mikoshi")
239
+ parser.add_argument(
240
+ "--verbose",
241
+ action="store_true",
242
+ help="Enable verbose external library output",
243
+ )
244
+ sub = parser.add_subparsers(dest="command", required=True)
245
+
246
+ index_parser = sub.add_parser("index", help="Index a repository")
247
+ index_parser.add_argument("path", help="Path to repository")
248
+ index_parser.set_defaults(func=cmd_index)
249
+
250
+ search_parser = sub.add_parser("search", help="Search an indexed repository")
251
+ search_parser.add_argument("path", help="Path to repository")
252
+ search_parser.add_argument("query", help="Search query")
253
+ search_parser.add_argument("--k", type=int, default=8, help="Number of results")
254
+ search_parser.set_defaults(func=cmd_search)
255
+
256
+ doctor_parser = sub.add_parser("doctor", help="Check Mikoshi setup")
257
+ doctor_parser.add_argument("path", nargs="?", help="Optional repo path")
258
+ doctor_parser.set_defaults(func=cmd_doctor)
259
+
260
+ status_parser = sub.add_parser("status", help="Show index status")
261
+ status_parser.add_argument("path", help="Path to repository")
262
+ status_parser.set_defaults(func=cmd_status)
263
+
264
+ clear_parser = sub.add_parser("clear", help="Clear index data")
265
+ clear_parser.add_argument("path", help="Path to repository")
266
+ clear_parser.set_defaults(func=cmd_clear)
267
+
268
+ login_parser = sub.add_parser("login", help="Sign in")
269
+ login_parser.set_defaults(func=cmd_login)
270
+
271
+ logout_parser = sub.add_parser("logout", help="Clear local auth state")
272
+ logout_parser.set_defaults(func=cmd_logout)
273
+
274
+ whoami_parser = sub.add_parser("whoami", help="Show current auth status")
275
+ whoami_parser.set_defaults(func=cmd_whoami)
276
+
277
+ auth_parser = sub.add_parser("auth", help="Auth configuration")
278
+ auth_sub = auth_parser.add_subparsers(dest="auth_command", required=True)
279
+ auth_configure = auth_sub.add_parser("configure", help="Set auth config")
280
+ auth_configure.set_defaults(func=cmd_auth_configure)
281
+
282
+ return parser
283
+
284
+
285
+ def main(argv: list[str] | None = None) -> int:
286
+ parser = build_parser()
287
+ args = parser.parse_args(argv)
288
+ quiet = not args.verbose
289
+ os.environ["MIKOSHI_QUIET_EXTERNAL_LIBS"] = "1" if quiet else "0"
290
+ configure_external_libs(quiet)
291
+ return int(args.func(args))
292
+
293
+
294
+ if __name__ == "__main__":
295
+ raise SystemExit(main())