mikoshi 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mikoshi-0.1.9/PKG-INFO +52 -0
- mikoshi-0.1.9/README.md +43 -0
- mikoshi-0.1.9/pyproject.toml +22 -0
- mikoshi-0.1.9/setup.cfg +17 -0
- mikoshi-0.1.9/src/mikoshi/__init__.py +3 -0
- mikoshi-0.1.9/src/mikoshi/auth.py +265 -0
- mikoshi-0.1.9/src/mikoshi/chunking.py +44 -0
- mikoshi-0.1.9/src/mikoshi/cli.py +295 -0
- mikoshi-0.1.9/src/mikoshi/config.py +125 -0
- mikoshi-0.1.9/src/mikoshi/entitlements.py +32 -0
- mikoshi-0.1.9/src/mikoshi/hashing.py +11 -0
- mikoshi-0.1.9/src/mikoshi/ignore.py +139 -0
- mikoshi-0.1.9/src/mikoshi/indexing/__init__.py +9 -0
- mikoshi-0.1.9/src/mikoshi/indexing/file_scanner.py +60 -0
- mikoshi-0.1.9/src/mikoshi/indexing/index_store.py +87 -0
- mikoshi-0.1.9/src/mikoshi/indexing/indexer.py +237 -0
- mikoshi-0.1.9/src/mikoshi/mcp_server/__init__.py +3 -0
- mikoshi-0.1.9/src/mikoshi/mcp_server/server.py +135 -0
- mikoshi-0.1.9/src/mikoshi/retrieval/__init__.py +17 -0
- mikoshi-0.1.9/src/mikoshi/retrieval/hybrid.py +109 -0
- mikoshi-0.1.9/src/mikoshi/retrieval/lexical.py +68 -0
- mikoshi-0.1.9/src/mikoshi/retrieval/rerank.py +27 -0
- mikoshi-0.1.9/src/mikoshi/retrieval/semantic.py +175 -0
- mikoshi-0.1.9/src/mikoshi/utils/__init__.py +11 -0
- mikoshi-0.1.9/src/mikoshi/utils/timer.py +18 -0
- mikoshi-0.1.9/src/mikoshi/utils/types.py +111 -0
- mikoshi-0.1.9/src/mikoshi.egg-info/PKG-INFO +52 -0
- mikoshi-0.1.9/src/mikoshi.egg-info/SOURCES.txt +34 -0
- mikoshi-0.1.9/src/mikoshi.egg-info/dependency_links.txt +1 -0
- mikoshi-0.1.9/src/mikoshi.egg-info/top_level.txt +1 -0
- mikoshi-0.1.9/tests/test_auth.py +137 -0
- mikoshi-0.1.9/tests/test_chunking.py +14 -0
- mikoshi-0.1.9/tests/test_config.py +13 -0
- mikoshi-0.1.9/tests/test_ignore.py +28 -0
- mikoshi-0.1.9/tests/test_index_roundtrip.py +43 -0
mikoshi-0.1.9/PKG-INFO
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mikoshi
|
|
3
|
+
Version: 0.1.9
|
|
4
|
+
Summary: Private local code search + MCP
|
|
5
|
+
Author: NEET
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
curl -fsSL https://raw.githubusercontent.com/NEETlabs/Mikoshi/main/scripts/install.sh | bash
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
✨ Mikoshi — private local code search + MCP
|
|
15
|
+
|
|
16
|
+
## Install (macOS)
|
|
17
|
+
```bash
|
|
18
|
+
brew install pipx
|
|
19
|
+
pipx ensurepath
|
|
20
|
+
pipx install mikoshi
|
|
21
|
+
|
|
22
|
+
Use
|
|
23
|
+
|
|
24
|
+
mikoshi index ~/project
|
|
25
|
+
mikoshi search ~/project "query"
|
|
26
|
+
|
|
27
|
+
MCP
|
|
28
|
+
|
|
29
|
+
mikoshi-mcp
|
|
30
|
+
|
|
31
|
+
Codex MCP config (config.toml)
|
|
32
|
+
|
|
33
|
+
[mcp_servers.mikoshi]
|
|
34
|
+
command = "mikoshi-mcp"
|
|
35
|
+
args = []
|
|
36
|
+
enabled = true
|
|
37
|
+
|
|
38
|
+
[projects."~/project"]
|
|
39
|
+
trust_level = "trusted"
|
|
40
|
+
|
|
41
|
+
If running Mikoshi from source (dev only), use:
|
|
42
|
+
command = “bash”
|
|
43
|
+
args = [”-lc”, “cd /path/to/Mikoshi && source .venv/bin/activate && mikoshi-mcp”]
|
|
44
|
+
|
|
45
|
+
Developer
|
|
46
|
+
|
|
47
|
+
make install
|
|
48
|
+
make test
|
|
49
|
+
mikoshi doctor
|
|
50
|
+
|
|
51
|
+
Note: First run may download the local embedding model once.
|
|
52
|
+
```
|
mikoshi-0.1.9/README.md
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
```bash
|
|
2
|
+
curl -fsSL https://raw.githubusercontent.com/NEETlabs/Mikoshi/main/scripts/install.sh | bash
|
|
3
|
+
```
|
|
4
|
+
|
|
5
|
+
✨ Mikoshi — private local code search + MCP
|
|
6
|
+
|
|
7
|
+
## Install (macOS)
|
|
8
|
+
```bash
|
|
9
|
+
brew install pipx
|
|
10
|
+
pipx ensurepath
|
|
11
|
+
pipx install mikoshi
|
|
12
|
+
|
|
13
|
+
Use
|
|
14
|
+
|
|
15
|
+
mikoshi index ~/project
|
|
16
|
+
mikoshi search ~/project "query"
|
|
17
|
+
|
|
18
|
+
MCP
|
|
19
|
+
|
|
20
|
+
mikoshi-mcp
|
|
21
|
+
|
|
22
|
+
Codex MCP config (config.toml)
|
|
23
|
+
|
|
24
|
+
[mcp_servers.mikoshi]
|
|
25
|
+
command = "mikoshi-mcp"
|
|
26
|
+
args = []
|
|
27
|
+
enabled = true
|
|
28
|
+
|
|
29
|
+
[projects."~/project"]
|
|
30
|
+
trust_level = "trusted"
|
|
31
|
+
|
|
32
|
+
If running Mikoshi from source (dev only), use:
|
|
33
|
+
command = “bash”
|
|
34
|
+
args = [”-lc”, “cd /path/to/Mikoshi && source .venv/bin/activate && mikoshi-mcp”]
|
|
35
|
+
|
|
36
|
+
Developer
|
|
37
|
+
|
|
38
|
+
make install
|
|
39
|
+
make test
|
|
40
|
+
mikoshi doctor
|
|
41
|
+
|
|
42
|
+
Note: First run may download the local embedding model once.
|
|
43
|
+
```
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=69", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "mikoshi"
|
|
7
|
+
version = "0.1.9"
|
|
8
|
+
description = "Private local code search + MCP"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "NEET" }
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
dependencies = []
|
|
17
|
+
|
|
18
|
+
[tool.setuptools]
|
|
19
|
+
package-dir = { "" = "src" }
|
|
20
|
+
|
|
21
|
+
[tool.setuptools.packages.find]
|
|
22
|
+
where = ["src"]
|
mikoshi-0.1.9/setup.cfg
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import secrets
|
|
8
|
+
import webbrowser
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
from urllib.parse import urlencode
|
|
14
|
+
|
|
15
|
+
import httpx
|
|
16
|
+
|
|
17
|
+
from mikoshi.entitlements import DEFAULT_FEATURES, DEFAULT_PLAN
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
AUTH_FILENAME = "auth.json"
|
|
21
|
+
CONFIG_FILENAME = "config.json"
|
|
22
|
+
DEFAULT_API_BASE_URL = "https://neet.gg"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AuthError(RuntimeError):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class AuthState:
|
|
31
|
+
access_token: str
|
|
32
|
+
expires_at: str
|
|
33
|
+
plan: str
|
|
34
|
+
features: list[str]
|
|
35
|
+
|
|
36
|
+
def to_dict(self) -> dict[str, Any]:
|
|
37
|
+
return {
|
|
38
|
+
"access_token": self.access_token,
|
|
39
|
+
"expires_at": self.expires_at,
|
|
40
|
+
"plan": self.plan,
|
|
41
|
+
"features": list(self.features),
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def from_dict(data: dict[str, Any]) -> "AuthState":
|
|
46
|
+
features = data.get("features") or []
|
|
47
|
+
if isinstance(features, str):
|
|
48
|
+
features = [features]
|
|
49
|
+
return AuthState(
|
|
50
|
+
access_token=str(data.get("access_token", "")),
|
|
51
|
+
expires_at=str(data.get("expires_at", "")),
|
|
52
|
+
plan=str(data.get("plan", DEFAULT_PLAN)),
|
|
53
|
+
features=[str(item) for item in features],
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True)
|
|
58
|
+
class BrokerConfig:
|
|
59
|
+
api_base_url: str
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _index_root() -> Path:
|
|
63
|
+
return Path(os.getenv("MIKOSHI_INDEX_ROOT", "~/.mikoshi")).expanduser()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def auth_path() -> Path:
|
|
67
|
+
return _index_root() / AUTH_FILENAME
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def config_path() -> Path:
|
|
71
|
+
return _index_root() / CONFIG_FILENAME
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def load_broker_config() -> BrokerConfig:
|
|
75
|
+
path = config_path()
|
|
76
|
+
if not path.exists():
|
|
77
|
+
return BrokerConfig(api_base_url=DEFAULT_API_BASE_URL)
|
|
78
|
+
try:
|
|
79
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
80
|
+
except Exception:
|
|
81
|
+
return BrokerConfig(api_base_url=DEFAULT_API_BASE_URL)
|
|
82
|
+
if not isinstance(data, dict):
|
|
83
|
+
return BrokerConfig(api_base_url=DEFAULT_API_BASE_URL)
|
|
84
|
+
api_base_url = str(data.get("api_base_url", "")).strip().rstrip("/")
|
|
85
|
+
if not api_base_url:
|
|
86
|
+
api_base_url = DEFAULT_API_BASE_URL
|
|
87
|
+
return BrokerConfig(api_base_url=api_base_url)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def save_broker_config(api_base_url: str) -> None:
|
|
91
|
+
clean_url = api_base_url.strip().rstrip("/") or DEFAULT_API_BASE_URL
|
|
92
|
+
path = config_path()
|
|
93
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
94
|
+
payload = json.dumps({"api_base_url": clean_url}, indent=2, sort_keys=True)
|
|
95
|
+
temp_path = path.with_suffix(".json.tmp")
|
|
96
|
+
fd = os.open(temp_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
|
97
|
+
with os.fdopen(fd, "w", encoding="utf-8") as handle:
|
|
98
|
+
handle.write(payload)
|
|
99
|
+
handle.write("\n")
|
|
100
|
+
os.replace(temp_path, path)
|
|
101
|
+
os.chmod(path, 0o600)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def load_auth_state() -> AuthState | None:
|
|
105
|
+
path = auth_path()
|
|
106
|
+
if not path.exists():
|
|
107
|
+
return None
|
|
108
|
+
try:
|
|
109
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
110
|
+
except Exception:
|
|
111
|
+
return None
|
|
112
|
+
if not isinstance(data, dict):
|
|
113
|
+
return None
|
|
114
|
+
return AuthState.from_dict(data)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def save_auth_state(state: AuthState) -> None:
|
|
118
|
+
path = auth_path()
|
|
119
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
120
|
+
payload = json.dumps(state.to_dict(), indent=2, sort_keys=True)
|
|
121
|
+
temp_path = path.with_suffix(".json.tmp")
|
|
122
|
+
fd = os.open(temp_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
|
123
|
+
with os.fdopen(fd, "w", encoding="utf-8") as handle:
|
|
124
|
+
handle.write(payload)
|
|
125
|
+
handle.write("\n")
|
|
126
|
+
os.replace(temp_path, path)
|
|
127
|
+
os.chmod(path, 0o600)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def clear_auth_state() -> None:
|
|
131
|
+
path = auth_path()
|
|
132
|
+
if path.exists():
|
|
133
|
+
path.unlink()
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _parse_expires_at(value: str) -> datetime | None:
|
|
137
|
+
if not value:
|
|
138
|
+
return None
|
|
139
|
+
try:
|
|
140
|
+
if value.endswith("Z"):
|
|
141
|
+
value = value[:-1] + "+00:00"
|
|
142
|
+
return datetime.fromisoformat(value)
|
|
143
|
+
except Exception:
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def is_expired(state: AuthState) -> bool:
|
|
148
|
+
expires_at = _parse_expires_at(state.expires_at)
|
|
149
|
+
if not expires_at:
|
|
150
|
+
return True
|
|
151
|
+
return datetime.now(timezone.utc) >= expires_at
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def email_from_token(access_token: str) -> str | None:
|
|
155
|
+
try:
|
|
156
|
+
parts = access_token.split(".")
|
|
157
|
+
if len(parts) < 2:
|
|
158
|
+
return None
|
|
159
|
+
payload = parts[1] + "=" * (-len(parts[1]) % 4)
|
|
160
|
+
decoded = base64.urlsafe_b64decode(payload.encode("utf-8"))
|
|
161
|
+
data = json.loads(decoded.decode("utf-8"))
|
|
162
|
+
for key in ("email", "user_email", "preferred_username"):
|
|
163
|
+
value = data.get(key)
|
|
164
|
+
if value:
|
|
165
|
+
return str(value)
|
|
166
|
+
return None
|
|
167
|
+
except Exception:
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _generate_state() -> str:
|
|
172
|
+
return secrets.token_urlsafe(32).rstrip("=")
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _generate_code_verifier() -> str:
|
|
176
|
+
return secrets.token_urlsafe(64).rstrip("=")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _code_challenge(code_verifier: str) -> str:
|
|
180
|
+
digest = hashlib.sha256(code_verifier.encode("utf-8")).digest()
|
|
181
|
+
return base64.urlsafe_b64encode(digest).decode("utf-8").rstrip("=")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _login_url(api_base_url: str, state: str, challenge: str) -> str:
|
|
185
|
+
query = {
|
|
186
|
+
"response_type": "code",
|
|
187
|
+
"client_id": "mikoshi",
|
|
188
|
+
"code_challenge": challenge,
|
|
189
|
+
"code_challenge_method": "S256",
|
|
190
|
+
"state": state,
|
|
191
|
+
}
|
|
192
|
+
return f"{api_base_url.rstrip('/')}/login?{urlencode(query)}"
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _parse_paste_payload(text: str) -> tuple[str, str]:
|
|
196
|
+
try:
|
|
197
|
+
data = json.loads(text)
|
|
198
|
+
except Exception as exc:
|
|
199
|
+
raise AuthError("Invalid JSON response.") from exc
|
|
200
|
+
if not isinstance(data, dict):
|
|
201
|
+
raise AuthError("Invalid JSON response.")
|
|
202
|
+
code = str(data.get("code", "")).strip()
|
|
203
|
+
state = str(data.get("state", "")).strip()
|
|
204
|
+
if not code or not state:
|
|
205
|
+
raise AuthError("Invalid JSON response.")
|
|
206
|
+
return code, state
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _ensure_state_match(expected: str, actual: str) -> None:
|
|
210
|
+
if expected != actual:
|
|
211
|
+
raise AuthError("State mismatch. Please retry login.")
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _exchange_code(
|
|
215
|
+
api_base_url: str, code: str, code_verifier: str, state: str
|
|
216
|
+
) -> dict[str, Any]:
|
|
217
|
+
payload = {"code": code, "code_verifier": code_verifier, "state": state}
|
|
218
|
+
url = f"{api_base_url.rstrip('/')}/cli/exchange"
|
|
219
|
+
with httpx.Client(timeout=10.0) as client:
|
|
220
|
+
response = client.post(url, json=payload)
|
|
221
|
+
if response.status_code >= 400:
|
|
222
|
+
raise AuthError("Login failed. Please try again.")
|
|
223
|
+
data = response.json()
|
|
224
|
+
if not isinstance(data, dict):
|
|
225
|
+
raise AuthError("Login failed. Invalid response.")
|
|
226
|
+
return data
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def login() -> str:
|
|
230
|
+
config = load_broker_config()
|
|
231
|
+
api_base_url = config.api_base_url
|
|
232
|
+
state = _generate_state()
|
|
233
|
+
code_verifier = _generate_code_verifier()
|
|
234
|
+
challenge = _code_challenge(code_verifier)
|
|
235
|
+
login_url = _login_url(api_base_url, state, challenge)
|
|
236
|
+
|
|
237
|
+
print("🔐 Starting authentication...")
|
|
238
|
+
print("🌐 Opening authentication page in your browser...")
|
|
239
|
+
opened = webbrowser.open(login_url, new=1, autoraise=True)
|
|
240
|
+
if not opened:
|
|
241
|
+
raise AuthError("Unable to open authentication page.")
|
|
242
|
+
|
|
243
|
+
raw = input("Paste the JSON response here: ").strip()
|
|
244
|
+
code, returned_state = _parse_paste_payload(raw)
|
|
245
|
+
_ensure_state_match(state, returned_state)
|
|
246
|
+
|
|
247
|
+
data = _exchange_code(api_base_url, code, code_verifier, returned_state)
|
|
248
|
+
access_token = str(data.get("access_token", "")).strip()
|
|
249
|
+
expires_at = str(data.get("expires_at", "")).strip()
|
|
250
|
+
email = str(data.get("email", "")).strip()
|
|
251
|
+
if not access_token or not expires_at or not email:
|
|
252
|
+
raise AuthError("Login failed. Invalid response.")
|
|
253
|
+
plan = str(data.get("plan") or DEFAULT_PLAN)
|
|
254
|
+
features = data.get("features") or list(DEFAULT_FEATURES)
|
|
255
|
+
if isinstance(features, str):
|
|
256
|
+
features = [features]
|
|
257
|
+
|
|
258
|
+
state_obj = AuthState(
|
|
259
|
+
access_token=access_token,
|
|
260
|
+
expires_at=expires_at,
|
|
261
|
+
plan=plan,
|
|
262
|
+
features=[str(item) for item in features],
|
|
263
|
+
)
|
|
264
|
+
save_auth_state(state_obj)
|
|
265
|
+
return email
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass(frozen=True)
|
|
7
|
+
class ChunkSpan:
|
|
8
|
+
start_line: int
|
|
9
|
+
end_line: int
|
|
10
|
+
text: str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def chunk_text(text: str, max_lines: int, overlap: int) -> list[ChunkSpan]:
|
|
14
|
+
if max_lines <= 0:
|
|
15
|
+
raise ValueError("max_lines must be > 0")
|
|
16
|
+
if overlap < 0:
|
|
17
|
+
raise ValueError("overlap must be >= 0")
|
|
18
|
+
if overlap >= max_lines:
|
|
19
|
+
raise ValueError("overlap must be smaller than max_lines")
|
|
20
|
+
|
|
21
|
+
lines = text.splitlines()
|
|
22
|
+
if not lines:
|
|
23
|
+
return []
|
|
24
|
+
|
|
25
|
+
step = max_lines - overlap
|
|
26
|
+
chunks: list[ChunkSpan] = []
|
|
27
|
+
start = 0
|
|
28
|
+
|
|
29
|
+
while start < len(lines):
|
|
30
|
+
end = min(start + max_lines, len(lines))
|
|
31
|
+
chunk_lines = lines[start:end]
|
|
32
|
+
chunk_text_value = "\n".join(chunk_lines)
|
|
33
|
+
chunks.append(
|
|
34
|
+
ChunkSpan(
|
|
35
|
+
start_line=start + 1,
|
|
36
|
+
end_line=end,
|
|
37
|
+
text=chunk_text_value,
|
|
38
|
+
)
|
|
39
|
+
)
|
|
40
|
+
if end == len(lines):
|
|
41
|
+
break
|
|
42
|
+
start += step
|
|
43
|
+
|
|
44
|
+
return chunks
|