mikoshi 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. mikoshi-0.1.9/PKG-INFO +52 -0
  2. mikoshi-0.1.9/README.md +43 -0
  3. mikoshi-0.1.9/pyproject.toml +22 -0
  4. mikoshi-0.1.9/setup.cfg +17 -0
  5. mikoshi-0.1.9/src/mikoshi/__init__.py +3 -0
  6. mikoshi-0.1.9/src/mikoshi/auth.py +265 -0
  7. mikoshi-0.1.9/src/mikoshi/chunking.py +44 -0
  8. mikoshi-0.1.9/src/mikoshi/cli.py +295 -0
  9. mikoshi-0.1.9/src/mikoshi/config.py +125 -0
  10. mikoshi-0.1.9/src/mikoshi/entitlements.py +32 -0
  11. mikoshi-0.1.9/src/mikoshi/hashing.py +11 -0
  12. mikoshi-0.1.9/src/mikoshi/ignore.py +139 -0
  13. mikoshi-0.1.9/src/mikoshi/indexing/__init__.py +9 -0
  14. mikoshi-0.1.9/src/mikoshi/indexing/file_scanner.py +60 -0
  15. mikoshi-0.1.9/src/mikoshi/indexing/index_store.py +87 -0
  16. mikoshi-0.1.9/src/mikoshi/indexing/indexer.py +237 -0
  17. mikoshi-0.1.9/src/mikoshi/mcp_server/__init__.py +3 -0
  18. mikoshi-0.1.9/src/mikoshi/mcp_server/server.py +135 -0
  19. mikoshi-0.1.9/src/mikoshi/retrieval/__init__.py +17 -0
  20. mikoshi-0.1.9/src/mikoshi/retrieval/hybrid.py +109 -0
  21. mikoshi-0.1.9/src/mikoshi/retrieval/lexical.py +68 -0
  22. mikoshi-0.1.9/src/mikoshi/retrieval/rerank.py +27 -0
  23. mikoshi-0.1.9/src/mikoshi/retrieval/semantic.py +175 -0
  24. mikoshi-0.1.9/src/mikoshi/utils/__init__.py +11 -0
  25. mikoshi-0.1.9/src/mikoshi/utils/timer.py +18 -0
  26. mikoshi-0.1.9/src/mikoshi/utils/types.py +111 -0
  27. mikoshi-0.1.9/src/mikoshi.egg-info/PKG-INFO +52 -0
  28. mikoshi-0.1.9/src/mikoshi.egg-info/SOURCES.txt +34 -0
  29. mikoshi-0.1.9/src/mikoshi.egg-info/dependency_links.txt +1 -0
  30. mikoshi-0.1.9/src/mikoshi.egg-info/top_level.txt +1 -0
  31. mikoshi-0.1.9/tests/test_auth.py +137 -0
  32. mikoshi-0.1.9/tests/test_chunking.py +14 -0
  33. mikoshi-0.1.9/tests/test_config.py +13 -0
  34. mikoshi-0.1.9/tests/test_ignore.py +28 -0
  35. mikoshi-0.1.9/tests/test_index_roundtrip.py +43 -0
mikoshi-0.1.9/PKG-INFO ADDED
@@ -0,0 +1,52 @@
1
+ Metadata-Version: 2.4
2
+ Name: mikoshi
3
+ Version: 0.1.9
4
+ Summary: Private local code search + MCP
5
+ Author: NEET
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+
10
+ ```bash
11
+ curl -fsSL https://raw.githubusercontent.com/NEETlabs/Mikoshi/main/scripts/install.sh | bash
12
+ ```
13
+
14
+ ✨ Mikoshi — private local code search + MCP
15
+
16
+ ## Install (macOS)
17
+ ```bash
18
+ brew install pipx
19
+ pipx ensurepath
20
+ pipx install mikoshi
21
+
22
+ Use
23
+
24
+ mikoshi index ~/project
25
+ mikoshi search ~/project "query"
26
+
27
+ MCP
28
+
29
+ mikoshi-mcp
30
+
31
+ Codex MCP config (config.toml)
32
+
33
+ [mcp_servers.mikoshi]
34
+ command = "mikoshi-mcp"
35
+ args = []
36
+ enabled = true
37
+
38
+ [projects."~/project"]
39
+ trust_level = "trusted"
40
+
41
+ If running Mikoshi from source (dev only), use:
42
+ command = “bash”
43
+ args = [”-lc”, “cd /path/to/Mikoshi && source .venv/bin/activate && mikoshi-mcp”]
44
+
45
+ Developer
46
+
47
+ make install
48
+ make test
49
+ mikoshi doctor
50
+
51
+ Note: First run may download the local embedding model once.
52
+ ```
@@ -0,0 +1,43 @@
1
+ ```bash
2
+ curl -fsSL https://raw.githubusercontent.com/NEETlabs/Mikoshi/main/scripts/install.sh | bash
3
+ ```
4
+
5
+ ✨ Mikoshi — private local code search + MCP
6
+
7
+ ## Install (macOS)
8
+ ```bash
9
+ brew install pipx
10
+ pipx ensurepath
11
+ pipx install mikoshi
12
+
13
+ Use
14
+
15
+ mikoshi index ~/project
16
+ mikoshi search ~/project "query"
17
+
18
+ MCP
19
+
20
+ mikoshi-mcp
21
+
22
+ Codex MCP config (config.toml)
23
+
24
+ [mcp_servers.mikoshi]
25
+ command = "mikoshi-mcp"
26
+ args = []
27
+ enabled = true
28
+
29
+ [projects."~/project"]
30
+ trust_level = "trusted"
31
+
32
+ If running Mikoshi from source (dev only), use:
33
+ command = “bash”
34
+ args = [”-lc”, “cd /path/to/Mikoshi && source .venv/bin/activate && mikoshi-mcp”]
35
+
36
+ Developer
37
+
38
+ make install
39
+ make test
40
+ mikoshi doctor
41
+
42
+ Note: First run may download the local embedding model once.
43
+ ```
@@ -0,0 +1,22 @@
1
+ [build-system]
2
+ requires = ["setuptools>=69", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "mikoshi"
7
+ version = "0.1.9"
8
+ description = "Private local code search + MCP"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [
13
+ { name = "NEET" }
14
+ ]
15
+
16
+ dependencies = []
17
+
18
+ [tool.setuptools]
19
+ package-dir = { "" = "src" }
20
+
21
+ [tool.setuptools.packages.find]
22
+ where = ["src"]
@@ -0,0 +1,17 @@
1
+ [metadata]
2
+ name = mikoshi
3
+ version = 0.1.9
4
+ description = Private local code search + MCP
5
+
6
+ [options]
7
+ package_dir =
8
+ = src
9
+ packages = find:
10
+
11
+ [options.packages.find]
12
+ where = src
13
+
14
+ [egg_info]
15
+ tag_build =
16
+ tag_date = 0
17
+
@@ -0,0 +1,3 @@
1
+ __all__ = ["__version__"]
2
+
3
+ __version__ = "0.1.9"
@@ -0,0 +1,265 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import hashlib
5
+ import json
6
+ import os
7
+ import secrets
8
+ import webbrowser
9
+ from dataclasses import dataclass
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Any
13
+ from urllib.parse import urlencode
14
+
15
+ import httpx
16
+
17
+ from mikoshi.entitlements import DEFAULT_FEATURES, DEFAULT_PLAN
18
+
19
+
20
+ AUTH_FILENAME = "auth.json"
21
+ CONFIG_FILENAME = "config.json"
22
+ DEFAULT_API_BASE_URL = "https://neet.gg"
23
+
24
+
25
+ class AuthError(RuntimeError):
26
+ pass
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class AuthState:
31
+ access_token: str
32
+ expires_at: str
33
+ plan: str
34
+ features: list[str]
35
+
36
+ def to_dict(self) -> dict[str, Any]:
37
+ return {
38
+ "access_token": self.access_token,
39
+ "expires_at": self.expires_at,
40
+ "plan": self.plan,
41
+ "features": list(self.features),
42
+ }
43
+
44
+ @staticmethod
45
+ def from_dict(data: dict[str, Any]) -> "AuthState":
46
+ features = data.get("features") or []
47
+ if isinstance(features, str):
48
+ features = [features]
49
+ return AuthState(
50
+ access_token=str(data.get("access_token", "")),
51
+ expires_at=str(data.get("expires_at", "")),
52
+ plan=str(data.get("plan", DEFAULT_PLAN)),
53
+ features=[str(item) for item in features],
54
+ )
55
+
56
+
57
+ @dataclass(frozen=True)
58
+ class BrokerConfig:
59
+ api_base_url: str
60
+
61
+
62
+ def _index_root() -> Path:
63
+ return Path(os.getenv("MIKOSHI_INDEX_ROOT", "~/.mikoshi")).expanduser()
64
+
65
+
66
+ def auth_path() -> Path:
67
+ return _index_root() / AUTH_FILENAME
68
+
69
+
70
+ def config_path() -> Path:
71
+ return _index_root() / CONFIG_FILENAME
72
+
73
+
74
+ def load_broker_config() -> BrokerConfig:
75
+ path = config_path()
76
+ if not path.exists():
77
+ return BrokerConfig(api_base_url=DEFAULT_API_BASE_URL)
78
+ try:
79
+ data = json.loads(path.read_text(encoding="utf-8"))
80
+ except Exception:
81
+ return BrokerConfig(api_base_url=DEFAULT_API_BASE_URL)
82
+ if not isinstance(data, dict):
83
+ return BrokerConfig(api_base_url=DEFAULT_API_BASE_URL)
84
+ api_base_url = str(data.get("api_base_url", "")).strip().rstrip("/")
85
+ if not api_base_url:
86
+ api_base_url = DEFAULT_API_BASE_URL
87
+ return BrokerConfig(api_base_url=api_base_url)
88
+
89
+
90
+ def save_broker_config(api_base_url: str) -> None:
91
+ clean_url = api_base_url.strip().rstrip("/") or DEFAULT_API_BASE_URL
92
+ path = config_path()
93
+ path.parent.mkdir(parents=True, exist_ok=True)
94
+ payload = json.dumps({"api_base_url": clean_url}, indent=2, sort_keys=True)
95
+ temp_path = path.with_suffix(".json.tmp")
96
+ fd = os.open(temp_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
97
+ with os.fdopen(fd, "w", encoding="utf-8") as handle:
98
+ handle.write(payload)
99
+ handle.write("\n")
100
+ os.replace(temp_path, path)
101
+ os.chmod(path, 0o600)
102
+
103
+
104
+ def load_auth_state() -> AuthState | None:
105
+ path = auth_path()
106
+ if not path.exists():
107
+ return None
108
+ try:
109
+ data = json.loads(path.read_text(encoding="utf-8"))
110
+ except Exception:
111
+ return None
112
+ if not isinstance(data, dict):
113
+ return None
114
+ return AuthState.from_dict(data)
115
+
116
+
117
+ def save_auth_state(state: AuthState) -> None:
118
+ path = auth_path()
119
+ path.parent.mkdir(parents=True, exist_ok=True)
120
+ payload = json.dumps(state.to_dict(), indent=2, sort_keys=True)
121
+ temp_path = path.with_suffix(".json.tmp")
122
+ fd = os.open(temp_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
123
+ with os.fdopen(fd, "w", encoding="utf-8") as handle:
124
+ handle.write(payload)
125
+ handle.write("\n")
126
+ os.replace(temp_path, path)
127
+ os.chmod(path, 0o600)
128
+
129
+
130
+ def clear_auth_state() -> None:
131
+ path = auth_path()
132
+ if path.exists():
133
+ path.unlink()
134
+
135
+
136
+ def _parse_expires_at(value: str) -> datetime | None:
137
+ if not value:
138
+ return None
139
+ try:
140
+ if value.endswith("Z"):
141
+ value = value[:-1] + "+00:00"
142
+ return datetime.fromisoformat(value)
143
+ except Exception:
144
+ return None
145
+
146
+
147
+ def is_expired(state: AuthState) -> bool:
148
+ expires_at = _parse_expires_at(state.expires_at)
149
+ if not expires_at:
150
+ return True
151
+ return datetime.now(timezone.utc) >= expires_at
152
+
153
+
154
+ def email_from_token(access_token: str) -> str | None:
155
+ try:
156
+ parts = access_token.split(".")
157
+ if len(parts) < 2:
158
+ return None
159
+ payload = parts[1] + "=" * (-len(parts[1]) % 4)
160
+ decoded = base64.urlsafe_b64decode(payload.encode("utf-8"))
161
+ data = json.loads(decoded.decode("utf-8"))
162
+ for key in ("email", "user_email", "preferred_username"):
163
+ value = data.get(key)
164
+ if value:
165
+ return str(value)
166
+ return None
167
+ except Exception:
168
+ return None
169
+
170
+
171
+ def _generate_state() -> str:
172
+ return secrets.token_urlsafe(32).rstrip("=")
173
+
174
+
175
+ def _generate_code_verifier() -> str:
176
+ return secrets.token_urlsafe(64).rstrip("=")
177
+
178
+
179
+ def _code_challenge(code_verifier: str) -> str:
180
+ digest = hashlib.sha256(code_verifier.encode("utf-8")).digest()
181
+ return base64.urlsafe_b64encode(digest).decode("utf-8").rstrip("=")
182
+
183
+
184
+ def _login_url(api_base_url: str, state: str, challenge: str) -> str:
185
+ query = {
186
+ "response_type": "code",
187
+ "client_id": "mikoshi",
188
+ "code_challenge": challenge,
189
+ "code_challenge_method": "S256",
190
+ "state": state,
191
+ }
192
+ return f"{api_base_url.rstrip('/')}/login?{urlencode(query)}"
193
+
194
+
195
+ def _parse_paste_payload(text: str) -> tuple[str, str]:
196
+ try:
197
+ data = json.loads(text)
198
+ except Exception as exc:
199
+ raise AuthError("Invalid JSON response.") from exc
200
+ if not isinstance(data, dict):
201
+ raise AuthError("Invalid JSON response.")
202
+ code = str(data.get("code", "")).strip()
203
+ state = str(data.get("state", "")).strip()
204
+ if not code or not state:
205
+ raise AuthError("Invalid JSON response.")
206
+ return code, state
207
+
208
+
209
+ def _ensure_state_match(expected: str, actual: str) -> None:
210
+ if expected != actual:
211
+ raise AuthError("State mismatch. Please retry login.")
212
+
213
+
214
+ def _exchange_code(
215
+ api_base_url: str, code: str, code_verifier: str, state: str
216
+ ) -> dict[str, Any]:
217
+ payload = {"code": code, "code_verifier": code_verifier, "state": state}
218
+ url = f"{api_base_url.rstrip('/')}/cli/exchange"
219
+ with httpx.Client(timeout=10.0) as client:
220
+ response = client.post(url, json=payload)
221
+ if response.status_code >= 400:
222
+ raise AuthError("Login failed. Please try again.")
223
+ data = response.json()
224
+ if not isinstance(data, dict):
225
+ raise AuthError("Login failed. Invalid response.")
226
+ return data
227
+
228
+
229
+ def login() -> str:
230
+ config = load_broker_config()
231
+ api_base_url = config.api_base_url
232
+ state = _generate_state()
233
+ code_verifier = _generate_code_verifier()
234
+ challenge = _code_challenge(code_verifier)
235
+ login_url = _login_url(api_base_url, state, challenge)
236
+
237
+ print("🔐 Starting authentication...")
238
+ print("🌐 Opening authentication page in your browser...")
239
+ opened = webbrowser.open(login_url, new=1, autoraise=True)
240
+ if not opened:
241
+ raise AuthError("Unable to open authentication page.")
242
+
243
+ raw = input("Paste the JSON response here: ").strip()
244
+ code, returned_state = _parse_paste_payload(raw)
245
+ _ensure_state_match(state, returned_state)
246
+
247
+ data = _exchange_code(api_base_url, code, code_verifier, returned_state)
248
+ access_token = str(data.get("access_token", "")).strip()
249
+ expires_at = str(data.get("expires_at", "")).strip()
250
+ email = str(data.get("email", "")).strip()
251
+ if not access_token or not expires_at or not email:
252
+ raise AuthError("Login failed. Invalid response.")
253
+ plan = str(data.get("plan") or DEFAULT_PLAN)
254
+ features = data.get("features") or list(DEFAULT_FEATURES)
255
+ if isinstance(features, str):
256
+ features = [features]
257
+
258
+ state_obj = AuthState(
259
+ access_token=access_token,
260
+ expires_at=expires_at,
261
+ plan=plan,
262
+ features=[str(item) for item in features],
263
+ )
264
+ save_auth_state(state_obj)
265
+ return email
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class ChunkSpan:
8
+ start_line: int
9
+ end_line: int
10
+ text: str
11
+
12
+
13
+ def chunk_text(text: str, max_lines: int, overlap: int) -> list[ChunkSpan]:
14
+ if max_lines <= 0:
15
+ raise ValueError("max_lines must be > 0")
16
+ if overlap < 0:
17
+ raise ValueError("overlap must be >= 0")
18
+ if overlap >= max_lines:
19
+ raise ValueError("overlap must be smaller than max_lines")
20
+
21
+ lines = text.splitlines()
22
+ if not lines:
23
+ return []
24
+
25
+ step = max_lines - overlap
26
+ chunks: list[ChunkSpan] = []
27
+ start = 0
28
+
29
+ while start < len(lines):
30
+ end = min(start + max_lines, len(lines))
31
+ chunk_lines = lines[start:end]
32
+ chunk_text_value = "\n".join(chunk_lines)
33
+ chunks.append(
34
+ ChunkSpan(
35
+ start_line=start + 1,
36
+ end_line=end,
37
+ text=chunk_text_value,
38
+ )
39
+ )
40
+ if end == len(lines):
41
+ break
42
+ start += step
43
+
44
+ return chunks