bits-bie 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bie/auth/__init__.py ADDED
@@ -0,0 +1,255 @@
1
+ """
2
+ Enterprise Authentication & Authorization
3
+ ===========================================
4
+ SSO (OAuth2/OIDC), JWT session tokens, API-key tenancy, and
5
+ role-based access control (RBAC) for the v1.0 Enterprise tier.
6
+
7
+ Components:
8
+ - ``APIKeyStore`` — per-tenant API keys with tier + quota
9
+ - ``JWTManager`` — issue/verify short-lived session JWTs (post-SSO)
10
+ - ``OIDCConfig`` — OIDC provider configuration (Okta/Azure AD/Google)
11
+ - ``RBAC`` — role → permission mapping
12
+ - ``require_role`` — FastAPI dependency factory
13
+
14
+ This module has zero hard dependency on a real IdP — ``OIDCConfig``
15
+ holds connection details, and ``verify_oidc_token`` validates tokens
16
+ issued by any standards-compliant OIDC provider via JWKS.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import time
22
+ import uuid
23
+ from dataclasses import dataclass, field
24
+ from enum import Enum
25
+ from typing import Optional
26
+
27
+ from jose import jwt, JWTError
28
+ from pydantic import BaseModel, Field
29
+
30
+ from bie.config import BIESettings, settings
31
+
32
+
33
+ # ── Roles & permissions (RBAC) ─────────────────────────────────────────────────
34
+
35
+ class Role(str, Enum):
36
+ VIEWER = "viewer" # search only
37
+ DEVELOPER = "developer" # search + agent + crawl
38
+ ADMIN = "admin" # all + indices/update + webhooks
39
+ OWNER = "owner" # all + billing + tenant management
40
+
41
+
42
+ _ROLE_PERMISSIONS: dict[Role, set[str]] = {
43
+ Role.VIEWER: {"search:read"},
44
+ Role.DEVELOPER: {"search:read", "agent:read", "crawl:write", "feedback:write"},
45
+ Role.ADMIN: {
46
+ "search:read", "agent:read", "crawl:write", "feedback:write",
47
+ "indices:write", "webhooks:write", "metrics:read", "kg:read",
48
+ },
49
+ Role.OWNER: {
50
+ "search:read", "agent:read", "crawl:write", "feedback:write",
51
+ "indices:write", "webhooks:write", "metrics:read", "kg:read",
52
+ "tenant:manage", "billing:manage",
53
+ },
54
+ }
55
+
56
+
57
+ class RBAC:
58
+ @staticmethod
59
+ def has_permission(role: Role, permission: str) -> bool:
60
+ return permission in _ROLE_PERMISSIONS.get(role, set())
61
+
62
+ @staticmethod
63
+ def permissions_for(role: Role) -> set[str]:
64
+ return _ROLE_PERMISSIONS.get(role, set())
65
+
66
+
67
+ # ── Tenant / API key model ──────────────────────────────────────────────────────
68
+
69
+ class PricingTier(str, Enum):
70
+ FREE = "free"
71
+ STARTUP = "startup"
72
+ BUSINESS = "business"
73
+ ENTERPRISE = "enterprise"
74
+
75
+
76
+ _TIER_QUOTAS: dict[PricingTier, int] = {
77
+ PricingTier.FREE: 50_000, # queries / month
78
+ PricingTier.STARTUP: 1_000_000,
79
+ PricingTier.BUSINESS: 10_000_000,
80
+ PricingTier.ENTERPRISE: -1, # unlimited
81
+ }
82
+
83
+
84
+ @dataclass
85
+ class Tenant:
86
+ tenant_id: str = field(default_factory=lambda: f"tn_{uuid.uuid4().hex[:12]}")
87
+ name: str = ""
88
+ tier: PricingTier = PricingTier.FREE
89
+ region: str = "us-east-1"
90
+ sso_enabled: bool = False
91
+ oidc_issuer: Optional[str] = None
92
+ created_at: float = field(default_factory=time.time)
93
+
94
+
95
+ @dataclass
96
+ class APIKeyRecord:
97
+ api_key: str
98
+ tenant_id: str
99
+ role: Role = Role.DEVELOPER
100
+ monthly_quota: int = -1
101
+ requests_this_month: int = 0
102
+ period_start: float = field(default_factory=time.time)
103
+ active: bool = True
104
+
105
+
106
+ class APIKeyStore:
107
+ """
108
+ In-memory multi-tenant API key store with quota tracking.
109
+ Swap for a Postgres/DynamoDB-backed implementation in production —
110
+ the interface (`validate`, `record_usage`, `create_key`) stays the same.
111
+ """
112
+
113
+ def __init__(self):
114
+ self._tenants: dict[str, Tenant] = {}
115
+ self._keys: dict[str, APIKeyRecord] = {}
116
+ self._seed_dev_key()
117
+
118
+ def _seed_dev_key(self) -> None:
119
+ tenant = Tenant(name="dev-tenant", tier=PricingTier.ENTERPRISE)
120
+ self._tenants[tenant.tenant_id] = tenant
121
+ self._keys["dev-key-12345"] = APIKeyRecord(
122
+ api_key="dev-key-12345",
123
+ tenant_id=tenant.tenant_id,
124
+ role=Role.OWNER,
125
+ monthly_quota=-1,
126
+ )
127
+
128
+ def create_tenant(self, name: str, tier: PricingTier, region: str = "us-east-1") -> Tenant:
129
+ tenant = Tenant(name=name, tier=tier, region=region)
130
+ self._tenants[tenant.tenant_id] = tenant
131
+ return tenant
132
+
133
+ def create_key(self, tenant_id: str, role: Role = Role.DEVELOPER) -> APIKeyRecord:
134
+ tenant = self._tenants.get(tenant_id)
135
+ if tenant is None:
136
+ raise ValueError(f"Unknown tenant {tenant_id}")
137
+ quota = _TIER_QUOTAS[tenant.tier]
138
+ key = APIKeyRecord(
139
+ api_key=f"bie_{uuid.uuid4().hex}",
140
+ tenant_id=tenant_id,
141
+ role=role,
142
+ monthly_quota=quota,
143
+ )
144
+ self._keys[key.api_key] = key
145
+ return key
146
+
147
+ def validate(self, api_key: str) -> tuple[APIKeyRecord, Tenant] | None:
148
+ record = self._keys.get(api_key)
149
+ if record is None or not record.active:
150
+ return None
151
+ tenant = self._tenants.get(record.tenant_id)
152
+ if tenant is None:
153
+ return None
154
+ self._maybe_reset_period(record)
155
+ return record, tenant
156
+
157
+ def record_usage(self, api_key: str) -> bool:
158
+ """Returns False if quota exceeded."""
159
+ record = self._keys.get(api_key)
160
+ if record is None:
161
+ return False
162
+ self._maybe_reset_period(record)
163
+ if record.monthly_quota >= 0 and record.requests_this_month >= record.monthly_quota:
164
+ return False
165
+ record.requests_this_month += 1
166
+ return True
167
+
168
+ def _maybe_reset_period(self, record: APIKeyRecord) -> None:
169
+ elapsed = time.time() - record.period_start
170
+ if elapsed > 30 * 86400: # 30-day rolling period
171
+ record.requests_this_month = 0
172
+ record.period_start = time.time()
173
+
174
+ def quota_status(self, api_key: str) -> dict:
175
+ record = self._keys.get(api_key)
176
+ if record is None:
177
+ return {}
178
+ return {
179
+ "quota": record.monthly_quota,
180
+ "used": record.requests_this_month,
181
+ "remaining": (
182
+ "unlimited" if record.monthly_quota < 0
183
+ else max(0, record.monthly_quota - record.requests_this_month)
184
+ ),
185
+ }
186
+
187
+
188
+ # ── JWT session management (post-SSO) ───────────────────────────────────────────
189
+
190
+ class JWTManager:
191
+ """
192
+ Issues and verifies short-lived JWTs after a successful SSO/OIDC
193
+ login. Used for browser-based dashboard sessions; API traffic uses
194
+ API keys (`APIKeyStore`).
195
+ """
196
+
197
+ def __init__(self, cfg: BIESettings = settings, ttl_seconds: int = 3600):
198
+ self._secret = cfg.secret_key
199
+ self._ttl = ttl_seconds
200
+ self._algorithm = "HS256"
201
+
202
+ def issue(self, subject: str, tenant_id: str, role: Role) -> str:
203
+ now = int(time.time())
204
+ payload = {
205
+ "sub": subject,
206
+ "tenant_id": tenant_id,
207
+ "role": role.value,
208
+ "iat": now,
209
+ "exp": now + self._ttl,
210
+ }
211
+ return jwt.encode(payload, self._secret, algorithm=self._algorithm)
212
+
213
+ def verify(self, token: str) -> dict | None:
214
+ try:
215
+ return jwt.decode(token, self._secret, algorithms=[self._algorithm])
216
+ except JWTError:
217
+ return None
218
+
219
+
220
+ # ── OIDC / SSO configuration ─────────────────────────────────────────────────────
221
+
222
+ class OIDCConfig(BaseModel):
223
+ """
224
+ Connection details for an enterprise SSO provider
225
+ (Okta, Azure AD, Google Workspace, OneLogin, etc).
226
+ Tokens issued by the provider are validated via JWKS against
227
+ `jwks_uri` — no provider-specific code required.
228
+ """
229
+
230
+ issuer: str
231
+ client_id: str
232
+ jwks_uri: str
233
+ audience: str
234
+ algorithms: list[str] = Field(default_factory=lambda: ["RS256"])
235
+
236
+
237
+ async def verify_oidc_token(token: str, oidc: OIDCConfig, jwks_keys: list[dict]) -> dict | None:
238
+ """
239
+ Verifies an OIDC ID token against the provider's JWKS.
240
+ `jwks_keys` should be fetched from `oidc.jwks_uri` and cached
241
+ by the caller (e.g. refreshed hourly).
242
+ """
243
+ try:
244
+ header = jwt.get_unverified_header(token)
245
+ kid = header.get("kid")
246
+ key = next((k for k in jwks_keys if k.get("kid") == kid), None)
247
+ if key is None:
248
+ return None
249
+ claims = jwt.decode(
250
+ token, key, algorithms=oidc.algorithms,
251
+ audience=oidc.audience, issuer=oidc.issuer,
252
+ )
253
+ return claims
254
+ except JWTError:
255
+ return None
bie/chunker.py ADDED
@@ -0,0 +1,83 @@
1
+ """
2
+ Lightweight text chunker — splits cleaned document text into
3
+ paragraph/section-sized chunks for indexing (PRD Module 8: Context Builder).
4
+
5
+ No heavy NLP deps; sentence/paragraph aware, with overlap.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+
12
+ from bie.models import Chunk, Document
13
+
14
+ _PARA_SPLIT = re.compile(r"\n\s*\n+")
15
+ _SENT_SPLIT = re.compile(r"(?<=[.!?])\s+")
16
+
17
+
18
+ def chunk_document(doc: Document, chunk_size: int = 800, overlap: int = 100) -> list[Chunk]:
19
+ """Split a document's text into overlapping chunks.
20
+
21
+ Strategy:
22
+ 1. Split on paragraph boundaries.
23
+ 2. Greedily pack paragraphs into chunks up to ``chunk_size`` chars.
24
+ 3. If a single paragraph exceeds ``chunk_size``, split it by sentence.
25
+ 4. Apply a small character-overlap between consecutive chunks so
26
+ retrieval doesn't lose context at boundaries.
27
+ """
28
+ text = (doc.text or "").strip()
29
+ if not text:
30
+ return []
31
+
32
+ paragraphs = [p.strip() for p in _PARA_SPLIT.split(text) if p.strip()]
33
+ if not paragraphs:
34
+ paragraphs = [text]
35
+
36
+ units: list[str] = []
37
+ for para in paragraphs:
38
+ if len(para) <= chunk_size:
39
+ units.append(para)
40
+ else:
41
+ sentences = _SENT_SPLIT.split(para)
42
+ buf = ""
43
+ for sent in sentences:
44
+ if len(buf) + len(sent) + 1 <= chunk_size:
45
+ buf = f"{buf} {sent}".strip()
46
+ else:
47
+ if buf:
48
+ units.append(buf)
49
+ buf = sent
50
+ if buf:
51
+ units.append(buf)
52
+
53
+ chunks: list[Chunk] = []
54
+ buf = ""
55
+ offset = 0
56
+ for unit in units:
57
+ candidate = f"{buf}\n\n{unit}".strip() if buf else unit
58
+ if len(candidate) <= chunk_size:
59
+ buf = candidate
60
+ continue
61
+
62
+ if buf:
63
+ chunks.append(_make_chunk(doc, buf, offset))
64
+ offset += max(len(buf) - overlap, 0)
65
+ tail = buf[-overlap:] if overlap else ""
66
+ buf = f"{tail}\n\n{unit}".strip() if tail else unit
67
+ else:
68
+ buf = unit
69
+
70
+ if buf:
71
+ chunks.append(_make_chunk(doc, buf, offset))
72
+
73
+ return chunks
74
+
75
+
76
+ def _make_chunk(doc: Document, text: str, start_offset: int) -> Chunk:
77
+ return Chunk(
78
+ doc_id=doc.doc_id,
79
+ text=text,
80
+ start_offset=start_offset,
81
+ end_offset=start_offset + len(text),
82
+ metadata={"site": doc.site, "title": doc.title},
83
+ )
bie/cli.py ADDED
@@ -0,0 +1,136 @@
1
+ """
2
+ BIE command-line interface.
3
+
4
+ Examples::
5
+
6
+ bie search "AI regulation 2026" --url https://example.com/news
7
+ bie crawl https://example.com --max-pages 20 --out docs.jsonl
8
+ bie serve --port 8000
9
+ bie mcp
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import sys
16
+
17
+ import click
18
+
19
+ from bie import __version__
20
+ from bie.config import BIESettings
21
+ from bie.engine import BIE
22
+
23
+
24
+ @click.group()
25
+ @click.version_option(__version__, prog_name="bie")
26
+ def cli() -> None:
27
+ """BIE — BitSearch Intelligence Engine. Real-time web search & extraction for AI apps."""
28
+
29
+
30
+ @cli.command()
31
+ @click.argument("query")
32
+ @click.option("--url", "urls", multiple=True, required=True, help="Seed URL(s) to crawl & search")
33
+ @click.option("--top-k", default=10, show_default=True, help="Number of results to return")
34
+ @click.option("--max-pages", default=10, show_default=True, help="Max pages to crawl per seed URL")
35
+ @click.option("--no-embeddings", is_flag=True, help="Disable semantic/vector search (BM25 only)")
36
+ @click.option("--json", "as_json", is_flag=True, help="Output raw JSON")
37
+ def search(query: str, urls: tuple[str, ...], top_k: int, max_pages: int, no_embeddings: bool, as_json: bool) -> None:
38
+ """Crawl URL(s) and search the freshly indexed content for QUERY."""
39
+ settings = BIESettings(max_pages=max_pages, use_embeddings=not no_embeddings)
40
+ engine = BIE(settings)
41
+ click.echo(f"Crawling {len(urls)} source(s)...", err=True)
42
+ n = engine.crawl(list(urls))
43
+ click.echo(f"Indexed {n} document(s). Searching...", err=True)
44
+
45
+ response = engine.search_full(query, top_k=top_k)
46
+
47
+ if as_json:
48
+ click.echo(response.model_dump_json(indent=2))
49
+ return
50
+
51
+ if not response.results:
52
+ click.echo("No results found.")
53
+ return
54
+
55
+ for i, r in enumerate(response.results, 1):
56
+ click.echo(f"\n{i}. {r.title}")
57
+ click.echo(f" {r.url}")
58
+ click.echo(f" score={r.score:.4f} trust={r.trust_score:.2f}")
59
+ click.echo(f" {r.snippet}")
60
+ click.echo(f"\n({response.took_ms} ms, {response.total_indexed_documents} docs indexed)")
61
+
62
+
63
+ @cli.command()
64
+ @click.argument("urls", nargs=-1, required=True)
65
+ @click.option("--max-pages", default=40, show_default=True)
66
+ @click.option("--max-depth", default=2, show_default=True)
67
+ @click.option("--out", "output", default=None, help="Write extracted documents as JSONL to this path")
68
+ def crawl(urls: tuple[str, ...], max_pages: int, max_depth: int, output: str | None) -> None:
69
+ """Crawl URLS using the Bitscrape-powered spider and print/save extracted docs."""
70
+ settings = BIESettings(max_pages=max_pages, max_depth=max_depth, use_embeddings=False)
71
+ engine = BIE(settings)
72
+ documents = engine.crawler.crawl(list(urls))
73
+
74
+ if output:
75
+ with open(output, "w", encoding="utf-8") as f:
76
+ for doc in documents:
77
+ f.write(doc.model_dump_json() + "\n")
78
+ click.echo(f"Wrote {len(documents)} document(s) to {output}")
79
+ else:
80
+ for doc in documents:
81
+ click.echo(json.dumps({"url": doc.url, "title": doc.title, "chars": len(doc.text)}))
82
+ click.echo(f"\n{len(documents)} document(s) crawled.", err=True)
83
+
84
+
85
+ @cli.command()
86
+ @click.option("--host", default=None, help="Bind host (default from settings / 0.0.0.0)")
87
+ @click.option("--port", default=None, type=int, help="Bind port (default from settings / 8000)")
88
+ @click.option("--reload", is_flag=True, help="Auto-reload on code changes (dev only)")
89
+ def serve(host: str | None, port: int | None, reload: bool) -> None:
90
+ """Run the BIE REST API server (FastAPI + Uvicorn)."""
91
+ try:
92
+ import uvicorn
93
+ except ImportError:
94
+ click.echo("uvicorn is required: pip install 'bits-bie[server]'", err=True)
95
+ sys.exit(1)
96
+
97
+ settings = BIESettings()
98
+ uvicorn.run(
99
+ "bie.server:app",
100
+ host=host or settings.host,
101
+ port=port or settings.port,
102
+ reload=reload,
103
+ )
104
+
105
+
106
+ @cli.command()
107
+ def mcp() -> None:
108
+ """Run BIE as a Model Context Protocol (MCP) server over stdio.
109
+
110
+ Add to your MCP client config (e.g. Claude Desktop) as a command:
111
+
112
+ \b
113
+ {
114
+ "mcpServers": {
115
+ "bie": {
116
+ "command": "bie",
117
+ "args": ["mcp"]
118
+ }
119
+ }
120
+ }
121
+ """
122
+ try:
123
+ from bie.mcp.server import run_mcp_server
124
+ except ImportError:
125
+ click.echo("MCP support requires: pip install 'bits-bie[mcp]'", err=True)
126
+ sys.exit(1)
127
+
128
+ run_mcp_server()
129
+
130
+
131
+ def main() -> None:
132
+ cli()
133
+
134
+
135
+ if __name__ == "__main__":
136
+ main()
bie/client.py ADDED
@@ -0,0 +1,214 @@
1
+ """
2
+ BIE Python SDK — High-level client
3
+ ====================================
4
+ Use this in your own AI applications to search BIE programmatically.
5
+
6
+ Example::
7
+
8
+ import asyncio
9
+ from bie import BIEClient
10
+
11
+ async def main():
12
+ async with BIEClient(base_url="http://localhost:8000", api_key="my-key") as client:
13
+ # Simple hybrid search
14
+ resp = await client.search("latest AI research 2026", top_k=5)
15
+ for r in resp.results:
16
+ print(r.rank, r.title, r.url, r.trust_score)
17
+
18
+ # RAG: grounded LLM answer with citations
19
+ answer = await client.agent_query("What happened in TSMC Q2 2026?")
20
+ print(answer.answer)
21
+ for c in answer.citations:
22
+ print(f" [{c.index}] {c.url}")
23
+
24
+ # On-demand crawl
25
+ await client.crawl_url("https://example.com/new-article")
26
+
27
+ asyncio.run(main())
28
+
29
+ Sync wrapper::
30
+
31
+ from bie.client import BIEClientSync
32
+
33
+ client = BIEClientSync(base_url="http://localhost:8000", api_key="my-key")
34
+ resp = client.search("semiconductor supply chain")
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import asyncio
40
+ from typing import AsyncIterator
41
+
42
+ import httpx
43
+
44
+ from bie.models import (
45
+ AgentResponse,
46
+ CrawlRequest,
47
+ CrawlResponse,
48
+ HealthResponse,
49
+ SearchFilters,
50
+ SearchRequest,
51
+ SearchResponse,
52
+ )
53
+
54
+
55
+ class BIEClient:
56
+ """
57
+ Async HTTP client for the BIE REST API.
58
+ Use as an async context manager or call `.close()` manually.
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ base_url: str = "http://localhost:8000",
64
+ api_key: str = "dev-key",
65
+ timeout: float = 30.0,
66
+ ):
67
+ self._base_url = base_url.rstrip("/")
68
+ self._api_key = api_key
69
+ self._client = httpx.AsyncClient(
70
+ base_url=self._base_url,
71
+ headers={"X-API-Key": api_key},
72
+ timeout=timeout,
73
+ )
74
+
75
+ async def __aenter__(self) -> "BIEClient":
76
+ return self
77
+
78
+ async def __aexit__(self, *_) -> None:
79
+ await self.close()
80
+
81
+ async def close(self) -> None:
82
+ await self._client.aclose()
83
+
84
+ # ── Search ────────────────────────────────────────────────────────────────
85
+
86
+ async def search(
87
+ self,
88
+ query: str,
89
+ top_k: int = 10,
90
+ filters: SearchFilters | None = None,
91
+ use_reranker: bool = True,
92
+ ) -> SearchResponse:
93
+ """Hybrid BM25 + vector search. Returns ranked results."""
94
+ payload = SearchRequest(
95
+ query=query,
96
+ top_k=top_k,
97
+ filters=filters or SearchFilters(),
98
+ use_reranker=use_reranker,
99
+ )
100
+ resp = await self._client.post("/search", content=payload.model_dump_json())
101
+ resp.raise_for_status()
102
+ return SearchResponse.model_validate(resp.json())
103
+
104
+ async def search_stream(
105
+ self, query: str, top_k: int = 10
106
+ ) -> AsyncIterator[str]:
107
+ """Stream search results as SSE events."""
108
+ async with self._client.stream(
109
+ "GET", "/search/stream", params={"query": query, "top_k": top_k}
110
+ ) as resp:
111
+ resp.raise_for_status()
112
+ async for line in resp.aiter_lines():
113
+ if line.startswith("data: "):
114
+ yield line[6:]
115
+
116
+ # ── Agent / RAG ───────────────────────────────────────────────────────────
117
+
118
+ async def agent_query(
119
+ self,
120
+ query: str,
121
+ top_k: int = 10,
122
+ filters: SearchFilters | None = None,
123
+ ) -> AgentResponse:
124
+ """Full RAG: retrieve → build context → LLM answer with citations."""
125
+ payload = SearchRequest(
126
+ query=query,
127
+ top_k=top_k,
128
+ filters=filters or SearchFilters(),
129
+ )
130
+ resp = await self._client.post("/agent/query", content=payload.model_dump_json())
131
+ resp.raise_for_status()
132
+ return AgentResponse.model_validate(resp.json())
133
+
134
+ async def agent_stream(
135
+ self, query: str, top_k: int = 10
136
+ ) -> AsyncIterator[str]:
137
+ """Stream LLM tokens via SSE."""
138
+ async with self._client.stream(
139
+ "GET", "/agent/stream", params={"query": query, "top_k": top_k}
140
+ ) as resp:
141
+ resp.raise_for_status()
142
+ async for line in resp.aiter_lines():
143
+ if line.startswith("data: "):
144
+ data = line[6:]
145
+ if data != "[DONE]":
146
+ yield data
147
+
148
+ # ── Crawler ───────────────────────────────────────────────────────────────
149
+
150
+ async def crawl_url(self, url: str, priority: int = 5) -> CrawlResponse:
151
+ """Trigger an on-demand crawl + index of a single URL."""
152
+ payload = CrawlRequest(url=url, priority=priority)
153
+ resp = await self._client.post("/crawl/url", content=payload.model_dump_json())
154
+ resp.raise_for_status()
155
+ return CrawlResponse.model_validate(resp.json())
156
+
157
+ async def crawl_batch(self, urls: list[str]) -> dict:
158
+ """Batch crawl up to 50 URLs."""
159
+ resp = await self._client.post("/crawl/batch", json=urls)
160
+ resp.raise_for_status()
161
+ return resp.json()
162
+
163
+ # ── Feedback ──────────────────────────────────────────────────────────────
164
+
165
+ async def feedback(self, url: str, positive: bool) -> None:
166
+ """Send thumbs-up / thumbs-down to improve trust scoring."""
167
+ resp = await self._client.post(
168
+ "/feedback", params={"url": url, "positive": str(positive).lower()}
169
+ )
170
+ resp.raise_for_status()
171
+
172
+ # ── Ops ───────────────────────────────────────────────────────────────────
173
+
174
+ async def health(self) -> HealthResponse:
175
+ resp = await self._client.get("/health")
176
+ resp.raise_for_status()
177
+ return HealthResponse.model_validate(resp.json())
178
+
179
+ async def metrics(self) -> dict:
180
+ resp = await self._client.get("/metrics")
181
+ resp.raise_for_status()
182
+ return resp.json()
183
+
184
+
185
+ # ── Sync wrapper ───────────────────────────────────────────────────────────────
186
+
187
+ class BIEClientSync:
188
+ """
189
+ Synchronous wrapper around BIEClient.
190
+ Useful in scripts, Jupyter notebooks, or non-async frameworks.
191
+ """
192
+
193
+ def __init__(self, **kwargs):
194
+ self._async_client = BIEClient(**kwargs)
195
+ self._loop = asyncio.new_event_loop()
196
+
197
+ def _run(self, coro):
198
+ return self._loop.run_until_complete(coro)
199
+
200
+ def search(self, query: str, **kwargs) -> SearchResponse:
201
+ return self._run(self._async_client.search(query, **kwargs))
202
+
203
+ def agent_query(self, query: str, **kwargs) -> AgentResponse:
204
+ return self._run(self._async_client.agent_query(query, **kwargs))
205
+
206
+ def crawl_url(self, url: str, **kwargs) -> CrawlResponse:
207
+ return self._run(self._async_client.crawl_url(url, **kwargs))
208
+
209
+ def health(self) -> HealthResponse:
210
+ return self._run(self._async_client.health())
211
+
212
+ def close(self):
213
+ self._run(self._async_client.close())
214
+ self._loop.close()