exa-search-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
exa_cli/__init__.py ADDED
File without changes
exa_cli/main.py ADDED
@@ -0,0 +1,222 @@
1
+ import argparse
2
+ import json
3
+ import os
4
+ import re
5
+ import sys
6
+ import textwrap
7
+
8
+ from exa_py import Exa
9
+ from exa_py.api import ContentsOptions
10
+
11
+ TEXT_PREVIEW_LEN = 2000
12
+
13
+ CATEGORIES = [
14
+ "news", "tweet", "github", "paper", "company",
15
+ "research paper", "financial report", "personal site",
16
+ "pdf", "linkedin profile",
17
+ ]
18
+
19
+
20
+ def _client() -> Exa:
21
+ key = os.environ.get("EXA_API_KEY")
22
+ if not key:
23
+ sys.exit("EXA_API_KEY not set. Export your key: export EXA_API_KEY=your-key")
24
+ return Exa(api_key=key)
25
+
26
+
27
+ def _dump_json(data) -> None:
28
+ if hasattr(data, "__dict__"):
29
+ print(json.dumps(data.__dict__, default=str, ensure_ascii=False, indent=2))
30
+ else:
31
+ print(json.dumps(data, default=str, ensure_ascii=False, indent=2))
32
+
33
+
34
+ def _meta(r) -> str:
35
+ parts = []
36
+ if getattr(r, "published_date", None):
37
+ parts.append(r.published_date[:10])
38
+ if getattr(r, "author", None):
39
+ parts.append(r.author)
40
+ return " · ".join(parts) if parts else ""
41
+
42
+
43
+ def _clean(text: str) -> str:
44
+ text = re.sub(r"(\[\.\.\.\]\s*){2,}", "[...]\n", text)
45
+ lines = [ln for ln in text.splitlines() if not re.fullmatch(r"\s*\d{0,3}\s*", ln)]
46
+ return "\n".join(lines).strip()
47
+
48
+
49
+ def _snippet(r, full_text: bool = False) -> str:
50
+ text = getattr(r, "text", None) or ""
51
+ highlights = getattr(r, "highlights", None) or []
52
+
53
+ if full_text and text:
54
+ preview = text[:TEXT_PREVIEW_LEN]
55
+ tail = "…" if len(text) > TEXT_PREVIEW_LEN else ""
56
+ return textwrap.fill(preview + tail, width=100)
57
+
58
+ if highlights:
59
+ parts = []
60
+ for h in highlights:
61
+ cleaned = _clean(h.strip())
62
+ if cleaned:
63
+ parts.append(textwrap.fill(cleaned, width=100))
64
+ return "\n\n".join(parts)
65
+
66
+ if text:
67
+ preview = text.strip()[:600]
68
+ return textwrap.fill(preview + ("…" if len(text) > 600 else ""), width=100)
69
+
70
+ return ""
71
+
72
+
73
+ def _print_results(results, response, full_text: bool = False) -> None:
74
+ for i, r in enumerate(results, 1):
75
+ title = getattr(r, "title", None) or "(no title)"
76
+ url = getattr(r, "url", "") or getattr(r, "id", "")
77
+ meta = _meta(r)
78
+ snippet = _snippet(r, full_text)
79
+
80
+ print(f"[{i}] {title}")
81
+ print(f" {url}")
82
+ if meta:
83
+ print(f" {meta}")
84
+ if snippet:
85
+ indented = "\n".join(" " + line for line in snippet.splitlines())
86
+ print(indented)
87
+ print()
88
+
89
+ cost = getattr(response, "cost_dollars", None)
90
+ t = getattr(response, "search_time", None)
91
+ cost_str = f"${cost.total:.4f}" if cost and hasattr(cost, "total") else ""
92
+ time_str = f"{t/1000:.1f}s" if t else ""
93
+ footer = " · ".join(filter(None, [f"{len(results)} results", cost_str, time_str]))
94
+ print(f"── {footer} ──")
95
+
96
+
97
+ def _split_domains(value: str | None) -> list[str] | None:
98
+ if not value:
99
+ return None
100
+ return [d.strip() for d in value.split(",") if d.strip()]
101
+
102
+
103
+ def search() -> None:
104
+ p = argparse.ArgumentParser(
105
+ description="Exa web search",
106
+ formatter_class=argparse.RawDescriptionHelpFormatter,
107
+ epilog="""examples:
108
+ exa-search "python async frameworks" -n 5
109
+ exa-search "latest AI news" --category news --start-date 2024-01-01
110
+ exa-search "rust web" --include-domain github.com,crates.io
111
+ exa-search "similar to this" --similar https://example.com
112
+ exa-search "query" --json | jq '.'
113
+ """,
114
+ )
115
+ p.add_argument("query", nargs="?", help="search query (omit when using --similar)")
116
+ p.add_argument("-n", "--num-results", type=int, default=8)
117
+ p.add_argument("-t", "--type", choices=["auto", "keyword", "neural"], default="auto")
118
+ p.add_argument("--text", action="store_true", help="fetch and show full page text")
119
+ p.add_argument("--category", choices=CATEGORIES, metavar="CATEGORY",
120
+ help=f"filter by content type: {', '.join(CATEGORIES)}")
121
+ p.add_argument("--start-date", metavar="YYYY-MM-DD", help="published on or after this date")
122
+ p.add_argument("--end-date", metavar="YYYY-MM-DD", help="published on or before this date")
123
+ p.add_argument("--include-domain", metavar="DOMAINS",
124
+ help="only include these domains (comma-separated)")
125
+ p.add_argument("--exclude-domain", metavar="DOMAINS",
126
+ help="exclude these domains (comma-separated)")
127
+ p.add_argument("--similar", metavar="URL", help="find pages similar to this URL")
128
+ p.add_argument("--json", action="store_true", help="raw JSON output")
129
+ args = p.parse_args()
130
+
131
+ if not args.query and not args.similar:
132
+ p.error("provide a query or --similar URL")
133
+
134
+ exa = _client()
135
+ highlights = {"num_sentences": 5, "highlights_per_url": 2}
136
+ contents = ContentsOptions(
137
+ text=True if args.text else False,
138
+ highlights=highlights,
139
+ )
140
+
141
+ kwargs: dict = dict(
142
+ num_results=args.num_results,
143
+ type=args.type,
144
+ contents=contents,
145
+ )
146
+ if args.category:
147
+ kwargs["category"] = args.category
148
+ if args.start_date:
149
+ kwargs["start_published_date"] = args.start_date
150
+ if args.end_date:
151
+ kwargs["end_published_date"] = args.end_date
152
+ if args.include_domain:
153
+ kwargs["include_domains"] = _split_domains(args.include_domain)
154
+ if args.exclude_domain:
155
+ kwargs["exclude_domains"] = _split_domains(args.exclude_domain)
156
+
157
+ if args.similar:
158
+ result = exa.find_similar(args.similar, **{k: v for k, v in kwargs.items()
159
+ if k not in ("type",)})
160
+ else:
161
+ result = exa.search(args.query, **kwargs)
162
+
163
+ if args.json:
164
+ _dump_json(result)
165
+ else:
166
+ _print_results(result.results, result, full_text=args.text)
167
+
168
+
169
+ def crawl() -> None:
170
+ p = argparse.ArgumentParser(
171
+ description="Exa URL crawl — extract full page content",
172
+ formatter_class=argparse.RawDescriptionHelpFormatter,
173
+ epilog="""examples:
174
+ exa-crawl https://example.com
175
+ exa-crawl https://arxiv.org/abs/2303.08774 -c 10000
176
+ exa-crawl https://example.com --json
177
+ """,
178
+ )
179
+ p.add_argument("url")
180
+ p.add_argument("-c", "--max-chars", type=int, default=5000,
181
+ help="max characters to return (default: 5000)")
182
+ p.add_argument("--json", action="store_true", help="raw JSON output")
183
+ args = p.parse_args()
184
+
185
+ exa = _client()
186
+ result = exa.get_contents([args.url], text={"max_characters": args.max_chars})
187
+
188
+ if args.json:
189
+ _dump_json(result)
190
+ else:
191
+ _print_results(result.results, result, full_text=True)
192
+
193
+
194
+ def research() -> None:
195
+ p = argparse.ArgumentParser(
196
+ description="Exa deep research — AI-powered research task",
197
+ formatter_class=argparse.RawDescriptionHelpFormatter,
198
+ epilog="""examples:
199
+ exa-research "explain transformer attention mechanisms"
200
+ exa-research "quantum computing current state" --model exa-research-pro
201
+ exa-research "topic" --json
202
+ """,
203
+ )
204
+ p.add_argument("topic")
205
+ p.add_argument("-m", "--model", default="exa-research",
206
+ choices=["exa-research", "exa-research-pro"])
207
+ p.add_argument("--json", action="store_true", help="raw JSON output")
208
+ args = p.parse_args()
209
+
210
+ exa = _client()
211
+ result = exa.research.create_task(instructions=args.topic, model=args.model)
212
+
213
+ if args.json:
214
+ _dump_json(result)
215
+ else:
216
+ task_id = getattr(result, "id", None) or getattr(result, "task_id", str(result))
217
+ status = getattr(result, "status", "submitted")
218
+ print(f"Research task created")
219
+ print(f" ID: {task_id}")
220
+ print(f" Model: {args.model}")
221
+ print(f" Status: {status}")
222
+ print(f"\nCheck status: exa-research-status {task_id}")
@@ -0,0 +1,194 @@
1
+ Metadata-Version: 2.4
2
+ Name: exa-search-cli
3
+ Version: 0.1.0
4
+ Summary: AI-agent friendly CLI for Exa — neural web search, URL crawling, and deep research from the terminal.
5
+ Project-URL: Homepage, https://github.com/nolan-vale/exa-cli
6
+ Project-URL: Repository, https://github.com/nolan-vale/exa-cli
7
+ Project-URL: Issues, https://github.com/nolan-vale/exa-cli/issues
8
+ Author: Nolan Vale
9
+ Maintainer: Nolan Vale Tools
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: ai-agents,automation,cli,developer-tools,exa,exa-search,llm-tools,search,search-automation,web-search
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Environment :: Console
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
21
+ Classifier: Topic :: Utilities
22
+ Requires-Python: >=3.11
23
+ Requires-Dist: exa-py>=1.0.0
24
+ Description-Content-Type: text/markdown
25
+
26
+ <div align="center">
27
+
28
+ [中文](README.zh-CN.md) · [Русский](README.ru.md) · [Português](README.pt-BR.md) · [Español](README.es.md) · [日本語](README.ja.md) · [한국어](README.ko.md)
29
+
30
+ <!--
31
+ COVER IMAGE — generate with this prompt, save as docs/cover.png, then uncomment below.
32
+
33
+ Prompt (Midjourney / DALL-E 3 / Stable Diffusion XL):
34
+ "A sleek dark terminal window filled with glowing cyan and blue search results streaming
35
+ in real-time, abstract neural network nodes forming a luminous web in the background,
36
+ minimalist developer aesthetic, pure black background, neon accent colors,
37
+ wide cinematic banner, 2:1 aspect ratio, no text, no UI chrome"
38
+
39
+ <img src="docs/cover.png" alt="exa-cli" width="100%">
40
+ -->
41
+
42
+ # exa-cli
43
+
44
+ CLI for [Exa](https://exa.ai) — neural web search, URL crawling, and AI deep research from the terminal.
45
+
46
+ [![PyPI](https://img.shields.io/pypi/v/exa-cli?color=0ea5e9&label=PyPI)](https://pypi.org/project/exa-cli/)
47
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-0ea5e9.svg)](https://python.org)
48
+ [![License: MIT](https://img.shields.io/badge/license-MIT-0ea5e9.svg)](LICENSE)
49
+ [![Stars](https://img.shields.io/github/stars/nolan-vale/exa-cli?style=social)](https://github.com/nolan-vale/exa-cli)
50
+
51
+ </div>
52
+
53
+ ---
54
+
55
+ ## What it does
56
+
57
+ `exa-cli` wraps the [Exa API](https://exa.ai) in three terminal commands. Exa is a search API built for AI applications — it searches by meaning, not keywords, which means it finds relevant pages even when the exact words are not present in the content.
58
+
59
+ `exa-search` searches the web. `exa-crawl` extracts clean readable text from any URL without HTML. `exa-research` submits a deep research task where Exa AI reads the web and synthesizes a structured answer.
60
+
61
+ Every command outputs clean `--json` for use in scripts, pipelines, and AI agent workflows.
62
+
63
+ ## Who it is for
64
+
65
+ - Developers who want web search access from shell scripts and automation pipelines
66
+ - AI agent developers who need structured, parseable web search output
67
+ - Researchers collecting, filtering, and crawling web content programmatically
68
+ - Anyone using Claude Code, Codex, Cursor, or Windsurf who wants to give their agent web access
69
+
70
+ ## Features
71
+
72
+ - Neural (semantic) search — finds pages by meaning, not keyword matching
73
+ - Find pages similar to any URL
74
+ - Filter by content type: `news`, `tweet`, `github`, `research paper`, `pdf`, and more
75
+ - Filter by date range and domain
76
+ - Full page text extraction from any URL (no HTML)
77
+ - AI deep research tasks with synthesized answers
78
+ - Clean `--json` output for every command
79
+
80
+ ## Installation
81
+
82
+ ```bash
83
+ uv tool install exa-cli
84
+ ```
85
+
86
+ > No `uv`? Run `curl -LsSf https://astral.sh/uv/install.sh | sh`, or use `pip install exa-search-cli`.
87
+
88
+ ## Quick start
89
+
90
+ Get your API key at [exa.ai](https://exa.ai) (free tier available):
91
+
92
+ ```bash
93
+ export EXA_API_KEY=your-key-here
94
+ exa-search "how do transformers work" --category "research paper"
95
+ ```
96
+
97
+ ## Usage
98
+
99
+ ```bash
100
+ # Neural search
101
+ exa-search "vision language models 2025" -n 10
102
+
103
+ # Find similar pages to a URL
104
+ exa-search --similar https://github.com/astral-sh/uv
105
+
106
+ # Filter by content type and date
107
+ exa-search "AI papers" --category "research paper" --start-date 2025-01-01
108
+
109
+ # Only specific domains
110
+ exa-search "documentation" --include-domain docs.python.org,docs.rs
111
+
112
+ # Exclude noisy domains
113
+ exa-search "tutorial" --exclude-domain medium.com,dev.to
114
+
115
+ # Crawl a page, get clean text
116
+ exa-crawl https://example.com -c 8000
117
+
118
+ # Deep research task
119
+ exa-research "current state of quantum error correction"
120
+
121
+ # JSON output for pipelines
122
+ exa-search "topic" --json | jq -r '.results[].url'
123
+ ```
124
+
125
+ **All flags — `exa-search`:**
126
+
127
+ | Flag | Default | Description |
128
+ |---|---|---|
129
+ | `-n` / `--num-results` | `8` | Number of results |
130
+ | `-t` / `--type` | `auto` | `auto` · `keyword` · `neural` |
131
+ | `--text` | off | Fetch and show full page text |
132
+ | `--category` | — | `news` · `tweet` · `github` · `research paper` · `pdf` · `company` · `personal site` · `linkedin profile` · `financial report` |
133
+ | `--start-date` | — | Published on or after `YYYY-MM-DD` |
134
+ | `--end-date` | — | Published on or before `YYYY-MM-DD` |
135
+ | `--include-domain` | — | Comma-separated domains to include only |
136
+ | `--exclude-domain` | — | Comma-separated domains to exclude |
137
+ | `--similar` | — | Find pages similar to this URL |
138
+ | `--json` | off | Raw JSON output |
139
+
140
+ **All flags — `exa-crawl`:** `-c` / `--max-chars` (default `5000`), `--json`
141
+
142
+ **All flags — `exa-research`:** `-m` / `--model` (`exa-research` or `exa-research-pro`), `--json`
143
+
144
+ ## AI agent usage
145
+
146
+ `exa-cli` is stateless, read-only, and exits cleanly — designed to be called by AI coding assistants.
147
+
148
+ ```bash
149
+ # Search and extract URLs (most common agent pattern)
150
+ exa-search "topic" --json | jq -r '.results[].url'
151
+
152
+ # Search → crawl first result
153
+ exa-search "topic" --json \
154
+ | jq -r '.results[0].url' \
155
+ | xargs exa-crawl -c 6000
156
+
157
+ # Find similar pages to a reference URL
158
+ exa-search --similar https://example.com --json
159
+
160
+ # Deep research, get synthesized answer
161
+ exa-research "topic" --json
162
+ ```
163
+
164
+ JSON schema for `exa-search --json`:
165
+ ```json
166
+ {
167
+ "results": [
168
+ {
169
+ "title": "...",
170
+ "url": "...",
171
+ "published_date": "2025-01-15T00:00:00.000Z",
172
+ "author": "...",
173
+ "highlights": ["excerpt..."],
174
+ "text": "full text if --text was passed"
175
+ }
176
+ ]
177
+ }
178
+ ```
179
+
180
+ See [AGENTS.md](AGENTS.md) for full schemas, exit codes, and environment reference.
181
+
182
+ → [Full documentation](docs/USAGE.md)
183
+
184
+ ## Project metadata
185
+
186
+ - **Author:** Nolan Vale
187
+ - **Brand:** Nolan Vale Tools
188
+ - **Focus:** search automation, CLI workflows, AI-agent tooling, developer productivity
189
+ - **License:** MIT
190
+
191
+ ---
192
+
193
+ Built by [Nolan Vale](https://github.com/nolan-vale)
194
+ Part of **Nolan Vale Tools** — practical open-source utilities for search, automation, AI agents, and developer workflows.
@@ -0,0 +1,7 @@
1
+ exa_cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ exa_cli/main.py,sha256=rKsVmBprIm_7IlyzqL2me0aglKpt4H4KzV0xOiY_wzc,7764
3
+ exa_search_cli-0.1.0.dist-info/METADATA,sha256=4QoGWmwoJ8nsJIZr6fh7r7vnsSiV5bH8hYlg9Cva-v8,6925
4
+ exa_search_cli-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
5
+ exa_search_cli-0.1.0.dist-info/entry_points.txt,sha256=cWblC0c1HncTveE30siqjM-OEW807vlXRyTHUiSyLt0,119
6
+ exa_search_cli-0.1.0.dist-info/licenses/LICENSE,sha256=iU6yWXwU1TUXTWL1SKS4m2eGnMBQ1EA6n4ZlRpGAm-c,1067
7
+ exa_search_cli-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,4 @@
1
+ [console_scripts]
2
+ exa-crawl = exa_cli.main:crawl
3
+ exa-research = exa_cli.main:research
4
+ exa-search = exa_cli.main:search
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Nolan Vale
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.