searchpin 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025-2026 Searchpin Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,4 @@
1
+ include README.md
2
+ include LICENSE
3
+ include requirements.txt
4
+ recursive-include searchpin *.py
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.4
2
+ Name: searchpin
3
+ Version: 1.0.0
4
+ Summary: Self-hosted web search for AI agents — zero API keys, embedding re-rank, multi-engine parallel search
5
+ Author: Searchpin Contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/telly6/searchpin
8
+ Project-URL: Repository, https://github.com/telly6/searchpin
9
+ Project-URL: Issues, https://github.com/telly6/searchpin/issues
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: fastembed>=0.3.0
22
+ Requires-Dist: numpy
23
+ Requires-Dist: brotli
24
+ Dynamic: license-file
25
+
26
+ # Searchpin
27
+
28
+ [![PyPI version](https://img.shields.io/pypi/v/searchpin)](https://pypi.org/project/searchpin/)
29
+ [![Python](https://img.shields.io/pypi/pyversions/searchpin)](https://pypi.org/project/searchpin/)
30
+ [![License](https://img.shields.io/pypi/l/searchpin)](LICENSE)
31
+ [![Docker](https://img.shields.io/badge/docker-ghcr.io-blue)](https://github.com/telly6/searchpin/pkgs/container/searchpin)
32
+
33
+ Self-hosted web search for AI agents — zero API keys, zero cost. `pip install searchpin` and you're done.
34
+
35
+ - **Search quality rivals commercial products** — four search engines in parallel (Baidu, Sogou, Bing CN, Bing Intl), merged and re-ranked by semantic similarity. A rare capability among free MCP search tools.
36
+ - **Zero-cost agent development** — no API keys, no sign-ups, no usage limits. Pair with a local LLM and your entire development loop costs nothing. Run 24/7 agent experiments without worrying about quotas.
37
+ - **Pollution detection** — automatically flags results that are unrelated to your query, so your agent doesn't chase irrelevant content.
38
+ - **Cross-verification** — results from four independent sources let your LLM corroborate information across engines, raising the credibility of what it finds.
39
+ - **Content extraction that handles modern sites** — goes beyond basic HTML-to-text to extract SSR hydration payloads (Next.js, Nuxt), JSON-LD structured data, and microdata from pages that would otherwise return empty.
40
+ - **Token-conscious output** — results are titles, URLs, and snippets only. Your LLM decides which pages are worth fetching in full. Structured extraction data is compact and truncated, keeping token overhead under control.
41
+
42
+ ## Quick Start
43
+
44
+ ```bash
45
+ pip install searchpin
46
+ searchpin-server
47
+ ```
48
+
49
+ ## Configuration
50
+
51
+ ### Claude Desktop / Cursor / any MCP client
52
+
53
+ Add to your `mcpServers` config:
54
+
55
+ ```json
56
+ {
57
+ "mcpServers": {
58
+ "Searchpin": {
59
+ "command": "searchpin-server",
60
+ "args": []
61
+ }
62
+ }
63
+ }
64
+ ```
65
+
66
+ ### VS Code
67
+
68
+ [![Install in VS Code](https://img.shields.io/badge/VS_Code-Install-0098FF?style=flat-square&logo=visualstudiocode&logoColor=white)](https://insiders.vscode.dev/redirect/mcp/install?name=Searchpin&config=%7B%22command%22%3A%22searchpin-server%22%2C%22args%22%3A%5B%5D%7D)
69
+ [![Install in VS Code Insiders](https://img.shields.io/badge/VS_Code_Insiders-Install-24bfa5?style=flat-square&logo=visualstudiocode&logoColor=white)](https://insiders.vscode.dev/redirect/mcp/install?name=Searchpin&config=%7B%22command%22%3A%22searchpin-server%22%2C%22args%22%3A%5B%5D%7D&quality=insiders)
70
+
71
+ Or manually, add to `.vscode/mcp.json`:
72
+
73
+ ```json
74
+ {
75
+ "servers": {
76
+ "Searchpin": {
77
+ "command": "searchpin-server",
78
+ "args": []
79
+ }
80
+ }
81
+ }
82
+ ```
83
+
84
+ ### Docker
85
+
86
+ ```bash
87
+ docker run -i --rm ghcr.io/telly6/searchpin:latest
88
+ ```
89
+
90
+ ```json
91
+ {
92
+ "mcpServers": {
93
+ "Searchpin": {
94
+ "command": "docker",
95
+ "args": ["run", "-i", "--rm", "ghcr.io/telly6/searchpin:latest"]
96
+ }
97
+ }
98
+ }
99
+ ```
100
+
101
+ ### Python API
102
+
103
+ ```python
104
+ from searchpin import SearchEngine
105
+
106
+ engine = SearchEngine()
107
+ results = engine.search("Python 3.13 new features")
108
+ page = engine.fetch("https://docs.python.org/3/whatsnew/3.13.html")
109
+ engine.close()
110
+ ```
111
+
112
+
113
+
@@ -0,0 +1,88 @@
1
+ # Searchpin
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/searchpin)](https://pypi.org/project/searchpin/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/searchpin)](https://pypi.org/project/searchpin/)
5
+ [![License](https://img.shields.io/pypi/l/searchpin)](LICENSE)
6
+ [![Docker](https://img.shields.io/badge/docker-ghcr.io-blue)](https://github.com/telly6/searchpin/pkgs/container/searchpin)
7
+
8
+ Self-hosted web search for AI agents — zero API keys, zero cost. `pip install searchpin` and you're done.
9
+
10
+ - **Search quality rivals commercial products** — four search engines in parallel (Baidu, Sogou, Bing CN, Bing Intl), merged and re-ranked by semantic similarity. A rare capability among free MCP search tools.
11
+ - **Zero-cost agent development** — no API keys, no sign-ups, no usage limits. Pair with a local LLM and your entire development loop costs nothing. Run 24/7 agent experiments without worrying about quotas.
12
+ - **Pollution detection** — automatically flags results that are unrelated to your query, so your agent doesn't chase irrelevant content.
13
+ - **Cross-verification** — results from four independent sources let your LLM corroborate information across engines, raising the credibility of what it finds.
14
+ - **Content extraction that handles modern sites** — goes beyond basic HTML-to-text to extract SSR hydration payloads (Next.js, Nuxt), JSON-LD structured data, and microdata from pages that would otherwise return empty.
15
+ - **Token-conscious output** — results are titles, URLs, and snippets only. Your LLM decides which pages are worth fetching in full. Structured extraction data is compact and truncated, keeping token overhead under control.
16
+
17
+ ## Quick Start
18
+
19
+ ```bash
20
+ pip install searchpin
21
+ searchpin-server
22
+ ```
23
+
24
+ ## Configuration
25
+
26
+ ### Claude Desktop / Cursor / any MCP client
27
+
28
+ Add to your `mcpServers` config:
29
+
30
+ ```json
31
+ {
32
+ "mcpServers": {
33
+ "Searchpin": {
34
+ "command": "searchpin-server",
35
+ "args": []
36
+ }
37
+ }
38
+ }
39
+ ```
40
+
41
+ ### VS Code
42
+
43
+ [![Install in VS Code](https://img.shields.io/badge/VS_Code-Install-0098FF?style=flat-square&logo=visualstudiocode&logoColor=white)](https://insiders.vscode.dev/redirect/mcp/install?name=Searchpin&config=%7B%22command%22%3A%22searchpin-server%22%2C%22args%22%3A%5B%5D%7D)
44
+ [![Install in VS Code Insiders](https://img.shields.io/badge/VS_Code_Insiders-Install-24bfa5?style=flat-square&logo=visualstudiocode&logoColor=white)](https://insiders.vscode.dev/redirect/mcp/install?name=Searchpin&config=%7B%22command%22%3A%22searchpin-server%22%2C%22args%22%3A%5B%5D%7D&quality=insiders)
45
+
46
+ Or manually, add to `.vscode/mcp.json`:
47
+
48
+ ```json
49
+ {
50
+ "servers": {
51
+ "Searchpin": {
52
+ "command": "searchpin-server",
53
+ "args": []
54
+ }
55
+ }
56
+ }
57
+ ```
58
+
59
+ ### Docker
60
+
61
+ ```bash
62
+ docker run -i --rm ghcr.io/telly6/searchpin:latest
63
+ ```
64
+
65
+ ```json
66
+ {
67
+ "mcpServers": {
68
+ "Searchpin": {
69
+ "command": "docker",
70
+ "args": ["run", "-i", "--rm", "ghcr.io/telly6/searchpin:latest"]
71
+ }
72
+ }
73
+ }
74
+ ```
75
+
76
+ ### Python API
77
+
78
+ ```python
79
+ from searchpin import SearchEngine
80
+
81
+ engine = SearchEngine()
82
+ results = engine.search("Python 3.13 new features")
83
+ page = engine.fetch("https://docs.python.org/3/whatsnew/3.13.html")
84
+ engine.close()
85
+ ```
86
+
87
+
88
+
@@ -0,0 +1,68 @@
1
+ [build-system]
2
+ requires = ["setuptools>=64", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "searchpin"
7
+ version = "1.0.0"
8
+ description = "Self-hosted web search for AI agents — zero API keys, embedding re-rank, multi-engine parallel search"
9
+ authors = [
10
+ {name = "Searchpin Contributors"}
11
+ ]
12
+ license = "MIT"
13
+ readme = "README.md"
14
+ requires-python = ">=3.10"
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Intended Audience :: Developers",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
23
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
24
+ ]
25
+ dependencies = [
26
+ "fastembed>=0.3.0",
27
+ "numpy",
28
+ "brotli",
29
+ ]
30
+
31
+ [project.scripts]
32
+ searchpin-server = "search_server:main"
33
+
34
+ [project.urls]
35
+ Homepage = "https://github.com/telly6/searchpin"
36
+ Repository = "https://github.com/telly6/searchpin"
37
+ Issues = "https://github.com/telly6/searchpin/issues"
38
+
39
+ [tool.setuptools.packages.find]
40
+ include = ["searchpin", "searchpin.*"]
41
+
42
+ [tool.setuptools]
43
+ py-modules = ["search_server"]
44
+
45
+ # ── pytest ──────────────────────────────────────────────────
46
+ [tool.pytest.ini_options]
47
+ testpaths = ["tests"]
48
+ python_files = ["test_*.py"]
49
+
50
+ # ── ruff (lint + format) ────────────────────────────────────
51
+ [tool.ruff]
52
+ target-version = "py310"
53
+ line-length = 120
54
+ exclude = ["dev/", "proxy.py", ".venv/", "__pycache__/"]
55
+
56
+ [tool.ruff.lint]
57
+ select = [
58
+ "E", # pycodestyle errors
59
+ "F", # pyflakes
60
+ "W", # pycodestyle warnings
61
+ "I", # isort
62
+ "UP", # pyupgrade
63
+ ]
64
+ ignore = ["E501"] # line length — logged f-strings are intentional
65
+
66
+ [tool.ruff.format]
67
+ quote-style = "double"
68
+ indent-style = "space"
@@ -0,0 +1,3 @@
1
+ fastembed>=0.3.0
2
+ numpy
3
+ brotli
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Searchpin MCP Server (stdio transport)
4
+ AI agent launches this as a subprocess. Reads JSON-RPC from stdin, writes to stdout.
5
+ """
6
+
7
+ import json
8
+ import sys
9
+
10
+ from searchpin.config import DEFAULT_MODEL_NAME, PRODUCT_NAME
11
+ from searchpin.engine import MCP_TOOLS, SearchEngine
12
+
13
+
14
+ def build_response(rid, result):
15
+ return {"jsonrpc": "2.0", "id": rid, "result": result}
16
+
17
+
18
+ def build_error(rid, code, message):
19
+ return {"jsonrpc": "2.0", "id": rid, "error": {"code": code, "message": message}}
20
+
21
+
22
+ def handle_mcp_request(body, engine):
23
+ rid = body.get("id")
24
+ method = body.get("method", "")
25
+ params = body.get("params", {})
26
+
27
+ if method == "initialize":
28
+ return build_response(
29
+ rid,
30
+ {
31
+ "protocolVersion": "2024-11-05",
32
+ "capabilities": {"tools": {}},
33
+ "serverInfo": {"name": PRODUCT_NAME, "version": "1.0.0"},
34
+ },
35
+ )
36
+ elif method == "notifications/initialized":
37
+ return None
38
+ elif method == "ping":
39
+ return build_response(rid, {})
40
+ elif method == "tools/list":
41
+ return build_response(rid, {"tools": MCP_TOOLS})
42
+ elif method == "resources/list":
43
+ return build_response(rid, {"resources": []})
44
+ elif method == "prompts/list":
45
+ return build_response(rid, {"prompts": []})
46
+ elif method == "tools/call":
47
+ tool_name = params.get("name", "")
48
+ args = params.get("arguments", {})
49
+ if tool_name == "web_search":
50
+ print(
51
+ f"[search_server] args keys={list(args.keys())!r} "
52
+ f"topic={args.get('topic')!r} "
53
+ f"exclude_domains={args.get('exclude_domains')!r}",
54
+ file=sys.stderr,
55
+ flush=True,
56
+ )
57
+ result = engine.search(
58
+ args.get("query", ""),
59
+ args.get("max_results", 10),
60
+ args.get("freshness"),
61
+ topic=args.get("topic"),
62
+ exclude_domains=args.get("exclude_domains"),
63
+ include_domains=args.get("include_domains"),
64
+ )
65
+ elif tool_name == "web_fetch":
66
+ result = engine.fetch(args.get("url", ""))
67
+ else:
68
+ return build_error(rid, -32601, f"Unknown tool: {tool_name}")
69
+ return build_response(
70
+ rid,
71
+ {
72
+ "content": [{"type": "text", "text": json.dumps(result, ensure_ascii=False, indent=2)}],
73
+ },
74
+ )
75
+ else:
76
+ return build_error(rid, -32601, f"Method not found: {method}")
77
+
78
+
79
+ def main():
80
+ import argparse
81
+
82
+ parser = argparse.ArgumentParser(description="Searchpin MCP Server")
83
+ parser.add_argument(
84
+ "--model",
85
+ type=str,
86
+ default=None,
87
+ help="Embedding model name (default: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2)",
88
+ )
89
+ args = parser.parse_args()
90
+
91
+ print(f"[{PRODUCT_NAME}] starting engine (stdio mode)...", file=sys.stderr, flush=True)
92
+
93
+ engine = SearchEngine(
94
+ model_name=args.model or DEFAULT_MODEL_NAME,
95
+ max_workers=3,
96
+ )
97
+
98
+ print(f"[{PRODUCT_NAME}] engine ready, waiting for requests on stdin", file=sys.stderr, flush=True)
99
+
100
+ for line in sys.stdin:
101
+ line = line.strip()
102
+ if not line:
103
+ continue
104
+
105
+ try:
106
+ body = json.loads(line)
107
+ result = handle_mcp_request(body, engine)
108
+ if result is not None:
109
+ sys.stdout.write(json.dumps(result, ensure_ascii=False) + "\n")
110
+ sys.stdout.flush()
111
+ except json.JSONDecodeError as e:
112
+ err = build_error(None, -32700, f"Parse error: {e}")
113
+ sys.stdout.write(json.dumps(err, ensure_ascii=False) + "\n")
114
+ sys.stdout.flush()
115
+ except Exception as e:
116
+ print(f"[{PRODUCT_NAME}] error: {e}", file=sys.stderr, flush=True)
117
+ err = build_error(None, -32603, str(e))
118
+ sys.stdout.write(json.dumps(err, ensure_ascii=False) + "\n")
119
+ sys.stdout.flush()
120
+
121
+ engine.close()
122
+
123
+
124
+ if __name__ == "__main__":
125
+ main()
@@ -0,0 +1,17 @@
1
+ """
2
+ Searchpin — Self-hosted web search for AI agents.
3
+
4
+ Usage:
5
+ from searchpin import SearchEngine
6
+
7
+ engine = SearchEngine()
8
+ results = engine.search("your query")
9
+ page = engine.fetch("https://example.com/article")
10
+ engine.close()
11
+ """
12
+
13
+ from searchpin.config import DEFAULT_MODEL_NAME, PRODUCT_NAME
14
+ from searchpin.engine import MCP_TOOLS, SearchEngine
15
+
16
+ __version__ = "1.0.0"
17
+ __all__ = ["SearchEngine", "MCP_TOOLS", "PRODUCT_NAME", "DEFAULT_MODEL_NAME"]
@@ -0,0 +1,11 @@
1
+ """Entry point for python -m searchpin — starts the MCP stdio server.
2
+
3
+ Usage:
4
+ python -m searchpin # Start with defaults
5
+ python -m searchpin --model ... # Custom embedding model
6
+ """
7
+
8
+ from search_server import main
9
+
10
+ if __name__ == "__main__":
11
+ main()