bits-bie 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bits_bie-0.2.0 → bits_bie-0.3.0}/.github/workflows/ci.yml +2 -2
- {bits_bie-0.2.0 → bits_bie-0.3.0}/PKG-INFO +3 -3
- {bits_bie-0.2.0 → bits_bie-0.3.0}/README.md +2 -2
- {bits_bie-0.2.0 → bits_bie-0.3.0}/pyproject.toml +1 -1
- bits_bie-0.2.0/bie/agents/__init__.py +0 -315
- bits_bie-0.2.0/bie/api/__init__.py +0 -457
- bits_bie-0.2.0/bie/auth/__init__.py +0 -255
- bits_bie-0.2.0/bie/client.py +0 -214
- bits_bie-0.2.0/bie/compliance/__init__.py +0 -472
- bits_bie-0.2.0/bie/context/__init__.py +0 -87
- bits_bie-0.2.0/bie/contradiction/__init__.py +0 -204
- bits_bie-0.2.0/bie/crawler/__init__.py +0 -325
- bits_bie-0.2.0/bie/gateway/__init__.py +0 -132
- bits_bie-0.2.0/bie/indexer/__init__.py +0 -376
- bits_bie-0.2.0/bie/kg/__init__.py +0 -394
- bits_bie-0.2.0/bie/regions/__init__.py +0 -236
- bits_bie-0.2.0/bie/retriever/__init__.py +0 -2
- bits_bie-0.2.0/bie/trust/__init__.py +0 -99
- bits_bie-0.2.0/bie/verifier/__init__.py +0 -216
- bits_bie-0.2.0/examples/quickstart.py +0 -131
- bits_bie-0.2.0/tests/__init__.py +0 -0
- bits_bie-0.2.0/tests/test_bie.py +0 -371
- bits_bie-0.2.0/tests/test_bie_v1.py +0 -708
- {bits_bie-0.2.0 → bits_bie-0.3.0}/.github/workflows/publish.yml +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/.gitignore +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/LICENSE +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/__init__.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/chunker.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/cli.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/config.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/crawler.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/engine.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/index.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/mcp/__init__.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/mcp/server.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/models.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/quicksearch.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/server.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/spiders/__init__.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/bie/spiders/generic.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/docs/API.md +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/examples/basic_search.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/examples/reusable_index.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/tests/test_chunker.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/tests/test_engine.py +0 -0
- {bits_bie-0.2.0 → bits_bie-0.3.0}/tests/test_index.py +0 -0
|
@@ -11,7 +11,7 @@ jobs:
|
|
|
11
11
|
runs-on: ubuntu-latest
|
|
12
12
|
strategy:
|
|
13
13
|
matrix:
|
|
14
|
-
python-version: ["3.
|
|
14
|
+
python-version: ["3.11", "3.12"]
|
|
15
15
|
steps:
|
|
16
16
|
- uses: actions/checkout@v4
|
|
17
17
|
- uses: actions/setup-python@v5
|
|
@@ -24,4 +24,4 @@ jobs:
|
|
|
24
24
|
- name: Run tests
|
|
25
25
|
run: pytest -v
|
|
26
26
|
- name: Lint
|
|
27
|
-
run: ruff check bie tests
|
|
27
|
+
run: ruff check bie tests
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bits-bie
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: BitSearch Intelligence Engine — real-time, citation-backed web search & extraction for AI apps. Built on Bitscrape.
|
|
5
5
|
Project-URL: Homepage, https://github.com/Sudharsansm/BIE
|
|
6
6
|
Project-URL: Repository, https://github.com/Sudharsansm/BIE
|
|
@@ -53,7 +53,7 @@ Description-Content-Type: text/markdown
|
|
|
53
53
|
**The fastest, simplest way to give any LLM, RAG pipeline, or AI agent
|
|
54
54
|
real-time, citation-backed web search and extraction.**
|
|
55
55
|
|
|
56
|
-
BIE crawls the live web (powered by [**
|
|
56
|
+
BIE crawls the live web (powered by [**BitS**](https://pypi.org/project/bitscrape/),
|
|
57
57
|
our high-performance async crawler), builds a hybrid **BM25 + semantic
|
|
58
58
|
vector** index in memory, and returns ranked, source-attributed results —
|
|
59
59
|
all from a single Python call, REST endpoint, CLI command, or
|
|
@@ -269,7 +269,7 @@ for Elasticsearch/Milvus-backed implementations behind the same
|
|
|
269
269
|
## Built on Bitscrape
|
|
270
270
|
|
|
271
271
|
BIE's crawling and extraction layer is powered by
|
|
272
|
-
[**
|
|
272
|
+
[**BitS**](https://github.com/Sudharsansm/Bitscrape)
|
|
273
273
|
(`pip install bitscrape`), our async, robots.txt-aware web scraping
|
|
274
274
|
framework — giving BIE high-performance, polite, production-grade crawling
|
|
275
275
|
out of the box.
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
**The fastest, simplest way to give any LLM, RAG pipeline, or AI agent
|
|
9
9
|
real-time, citation-backed web search and extraction.**
|
|
10
10
|
|
|
11
|
-
BIE crawls the live web (powered by [**
|
|
11
|
+
BIE crawls the live web (powered by [**BitS**](https://pypi.org/project/bitscrape/),
|
|
12
12
|
our high-performance async crawler), builds a hybrid **BM25 + semantic
|
|
13
13
|
vector** index in memory, and returns ranked, source-attributed results —
|
|
14
14
|
all from a single Python call, REST endpoint, CLI command, or
|
|
@@ -224,7 +224,7 @@ for Elasticsearch/Milvus-backed implementations behind the same
|
|
|
224
224
|
## Built on Bitscrape
|
|
225
225
|
|
|
226
226
|
BIE's crawling and extraction layer is powered by
|
|
227
|
-
[**
|
|
227
|
+
[**BitS**](https://github.com/Sudharsansm/Bitscrape)
|
|
228
228
|
(`pip install bitscrape`), our async, robots.txt-aware web scraping
|
|
229
229
|
framework — giving BIE high-performance, polite, production-grade crawling
|
|
230
230
|
out of the box.
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "bits-bie"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "BitSearch Intelligence Engine — real-time, citation-backed web search & extraction for AI apps. Built on Bitscrape."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -1,315 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
M07 — Multi-Agent Orchestrator
|
|
3
|
-
================================
|
|
4
|
-
Lead agent decomposes a query into sub-tasks (web search, KG lookup,
|
|
5
|
-
summarization, fact verification), runs sub-agents in parallel
|
|
6
|
-
(async fan-out) or sequentially (linear chain), and merges results
|
|
7
|
-
via a shared in-memory (or Redis-backed) memory store.
|
|
8
|
-
|
|
9
|
-
Usage::
|
|
10
|
-
|
|
11
|
-
from bie.agents import AgentOrchestrator
|
|
12
|
-
|
|
13
|
-
orch = AgentOrchestrator(retriever, kg, llm, fact_verifier)
|
|
14
|
-
result = await orch.run("Compare TSMC and Samsung's 2026 capex plans")
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
from __future__ import annotations
|
|
18
|
-
|
|
19
|
-
import asyncio
|
|
20
|
-
import json
|
|
21
|
-
import logging
|
|
22
|
-
import time
|
|
23
|
-
import uuid
|
|
24
|
-
from dataclasses import dataclass, field
|
|
25
|
-
from enum import Enum
|
|
26
|
-
from typing import Any, Awaitable, Callable
|
|
27
|
-
|
|
28
|
-
from bie.config import BIESettings, settings
|
|
29
|
-
from bie.context import ContextBuilder
|
|
30
|
-
from bie.models import AgentResponse, Citation, SearchFilters, SearchResult
|
|
31
|
-
|
|
32
|
-
logger = logging.getLogger(__name__)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# ── Shared memory store ────────────────────────────────────────────────────────
|
|
36
|
-
|
|
37
|
-
class SharedMemory:
|
|
38
|
-
"""
|
|
39
|
-
Persists intermediate sub-agent findings across turns.
|
|
40
|
-
Default: in-memory dict. Set `redis_client` for Redis-backed
|
|
41
|
-
cross-process sharing (per PRD M07).
|
|
42
|
-
"""
|
|
43
|
-
|
|
44
|
-
def __init__(self, redis_client: Any = None, ttl_seconds: int = 3600):
|
|
45
|
-
self._store: dict[str, dict[str, Any]] = {}
|
|
46
|
-
self._redis = redis_client
|
|
47
|
-
self._ttl = ttl_seconds
|
|
48
|
-
|
|
49
|
-
async def set(self, session_id: str, key: str, value: Any) -> None:
|
|
50
|
-
if self._redis is not None:
|
|
51
|
-
await self._redis.hset(f"bie:session:{session_id}", key, json.dumps(value))
|
|
52
|
-
await self._redis.expire(f"bie:session:{session_id}", self._ttl)
|
|
53
|
-
return
|
|
54
|
-
self._store.setdefault(session_id, {})[key] = value
|
|
55
|
-
|
|
56
|
-
async def get(self, session_id: str, key: str) -> Any:
|
|
57
|
-
if self._redis is not None:
|
|
58
|
-
raw = await self._redis.hget(f"bie:session:{session_id}", key)
|
|
59
|
-
return json.loads(raw) if raw else None
|
|
60
|
-
return self._store.get(session_id, {}).get(key)
|
|
61
|
-
|
|
62
|
-
async def get_all(self, session_id: str) -> dict[str, Any]:
|
|
63
|
-
if self._redis is not None:
|
|
64
|
-
raw = await self._redis.hgetall(f"bie:session:{session_id}")
|
|
65
|
-
return {k: json.loads(v) for k, v in raw.items()}
|
|
66
|
-
return dict(self._store.get(session_id, {}))
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
# ── Token budget tracker ───────────────────────────────────────────────────────
|
|
70
|
-
|
|
71
|
-
class TokenBudget:
|
|
72
|
-
"""Per-agent / per-session token budget enforcement."""
|
|
73
|
-
|
|
74
|
-
def __init__(self, max_tokens: int):
|
|
75
|
-
self._max = max_tokens
|
|
76
|
-
self._used = 0
|
|
77
|
-
|
|
78
|
-
def consume(self, tokens: int) -> bool:
|
|
79
|
-
"""Returns False if consuming would exceed budget."""
|
|
80
|
-
if self._used + tokens > self._max:
|
|
81
|
-
return False
|
|
82
|
-
self._used += tokens
|
|
83
|
-
return True
|
|
84
|
-
|
|
85
|
-
@property
|
|
86
|
-
def remaining(self) -> int:
|
|
87
|
-
return max(0, self._max - self._used)
|
|
88
|
-
|
|
89
|
-
@property
|
|
90
|
-
def used(self) -> int:
|
|
91
|
-
return self._used
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
# ── Sub-task definitions ────────────────────────────────────────────────────────
|
|
95
|
-
|
|
96
|
-
class TaskType(str, Enum):
|
|
97
|
-
SEARCH_WEB = "search_web"
|
|
98
|
-
SEARCH_KG = "search_kg"
|
|
99
|
-
SUMMARIZE = "summarize"
|
|
100
|
-
VERIFY_FACT = "verify_fact"
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
@dataclass
|
|
104
|
-
class SubTask:
|
|
105
|
-
task_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
|
|
106
|
-
type: TaskType = TaskType.SEARCH_WEB
|
|
107
|
-
query: str = ""
|
|
108
|
-
depends_on: list[str] = field(default_factory=list)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
@dataclass
|
|
112
|
-
class SubTaskResult:
|
|
113
|
-
task_id: str
|
|
114
|
-
type: TaskType
|
|
115
|
-
output: Any
|
|
116
|
-
elapsed_ms: float
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
# ── Query decomposition ────────────────────────────────────────────────────────
|
|
120
|
-
|
|
121
|
-
class QueryDecomposer:
|
|
122
|
-
"""
|
|
123
|
-
Splits a complex query into sub-tasks.
|
|
124
|
-
Heuristic decomposition: detects "compare", "and", multi-entity
|
|
125
|
-
queries → fan-out search_web tasks per entity, plus a KG lookup
|
|
126
|
-
and a final summarize task. Production can swap this for an
|
|
127
|
-
LLM-based planner.
|
|
128
|
-
"""
|
|
129
|
-
|
|
130
|
-
_COMPARISON_WORDS = {"compare", "vs", "versus", "difference between"}
|
|
131
|
-
|
|
132
|
-
def decompose(self, query: str) -> list[SubTask]:
|
|
133
|
-
tasks: list[SubTask] = []
|
|
134
|
-
q_lower = query.lower()
|
|
135
|
-
|
|
136
|
-
# Always include a primary web search
|
|
137
|
-
primary = SubTask(type=TaskType.SEARCH_WEB, query=query)
|
|
138
|
-
tasks.append(primary)
|
|
139
|
-
|
|
140
|
-
# KG lookup for named-entity-like capitalized terms
|
|
141
|
-
import re
|
|
142
|
-
entities = re.findall(r"\b[A-Z][a-zA-Z]{2,}(?:\s+[A-Z][a-zA-Z]{2,})?\b", query)
|
|
143
|
-
if entities:
|
|
144
|
-
tasks.append(SubTask(type=TaskType.SEARCH_KG, query=" ".join(entities[:3])))
|
|
145
|
-
|
|
146
|
-
# Comparison → split into sub-searches per entity
|
|
147
|
-
if any(w in q_lower for w in self._COMPARISON_WORDS) and len(entities) >= 2:
|
|
148
|
-
for ent in entities[:2]:
|
|
149
|
-
tasks.append(SubTask(type=TaskType.SEARCH_WEB, query=f"{ent} {query}"))
|
|
150
|
-
|
|
151
|
-
# Final synthesis depends on all prior tasks
|
|
152
|
-
summarize = SubTask(
|
|
153
|
-
type=TaskType.SUMMARIZE,
|
|
154
|
-
query=query,
|
|
155
|
-
depends_on=[t.task_id for t in tasks],
|
|
156
|
-
)
|
|
157
|
-
tasks.append(summarize)
|
|
158
|
-
|
|
159
|
-
return tasks
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
# ── Orchestrator ────────────────────────────────────────────────────────────────
|
|
163
|
-
|
|
164
|
-
class AgentOrchestrator:
|
|
165
|
-
"""
|
|
166
|
-
Executes a multi-agent plan: decompose → fan-out sub-agents
|
|
167
|
-
(async) → merge → synthesize via LLM with fact verification.
|
|
168
|
-
"""
|
|
169
|
-
|
|
170
|
-
def __init__(
|
|
171
|
-
self,
|
|
172
|
-
retriever, # HybridRetriever
|
|
173
|
-
kg=None, # KnowledgeGraph
|
|
174
|
-
llm=None, # LLMGateway
|
|
175
|
-
fact_verifier=None, # FactVerifier
|
|
176
|
-
cfg: BIESettings = settings,
|
|
177
|
-
memory: SharedMemory | None = None,
|
|
178
|
-
):
|
|
179
|
-
self._retriever = retriever
|
|
180
|
-
self._kg = kg
|
|
181
|
-
self._llm = llm
|
|
182
|
-
self._fact_verifier = fact_verifier
|
|
183
|
-
self._cfg = cfg
|
|
184
|
-
self._decomposer = QueryDecomposer()
|
|
185
|
-
self._context_builder = ContextBuilder(cfg)
|
|
186
|
-
self._memory = memory or SharedMemory(ttl_seconds=cfg.redis_ttl_seconds)
|
|
187
|
-
|
|
188
|
-
async def run(
|
|
189
|
-
self,
|
|
190
|
-
query: str,
|
|
191
|
-
session_id: str | None = None,
|
|
192
|
-
top_k: int = 5,
|
|
193
|
-
mode: str = "async", # "async" (fan-out) | "sync" (linear chain)
|
|
194
|
-
token_budget: int = 4000,
|
|
195
|
-
) -> dict:
|
|
196
|
-
"""
|
|
197
|
-
Returns a dict with: answer, citations, sub_results, contradiction_flags,
|
|
198
|
-
latency_ms, mode, session_id.
|
|
199
|
-
"""
|
|
200
|
-
session_id = session_id or str(uuid.uuid4())
|
|
201
|
-
t0 = time.perf_counter()
|
|
202
|
-
budget = TokenBudget(token_budget)
|
|
203
|
-
|
|
204
|
-
tasks = self._decomposer.decompose(query)
|
|
205
|
-
logger.debug("Decomposed '%s' into %d sub-tasks", query, len(tasks))
|
|
206
|
-
|
|
207
|
-
# Separate the synthesis task (always last, depends on others)
|
|
208
|
-
sub_tasks = [t for t in tasks if t.type != TaskType.SUMMARIZE]
|
|
209
|
-
synth_task = next((t for t in tasks if t.type == TaskType.SUMMARIZE), None)
|
|
210
|
-
|
|
211
|
-
if mode == "async":
|
|
212
|
-
sub_results = await self._run_parallel(sub_tasks, top_k, budget, session_id)
|
|
213
|
-
else:
|
|
214
|
-
sub_results = await self._run_sequential(sub_tasks, top_k, budget, session_id)
|
|
215
|
-
|
|
216
|
-
# Merge all search results for context building
|
|
217
|
-
all_search_results: list[SearchResult] = []
|
|
218
|
-
kg_results: list[dict] = []
|
|
219
|
-
for sr in sub_results:
|
|
220
|
-
if sr.type == TaskType.SEARCH_WEB:
|
|
221
|
-
all_search_results.extend(sr.output)
|
|
222
|
-
elif sr.type == TaskType.SEARCH_KG:
|
|
223
|
-
kg_results.extend(sr.output)
|
|
224
|
-
|
|
225
|
-
# Dedup by chunk_id, keep highest rrf_score
|
|
226
|
-
merged: dict[str, SearchResult] = {}
|
|
227
|
-
for r in all_search_results:
|
|
228
|
-
if r.chunk_id not in merged or r.rrf_score > merged[r.chunk_id].rrf_score:
|
|
229
|
-
merged[r.chunk_id] = r
|
|
230
|
-
ranked = sorted(merged.values(), key=lambda r: r.rrf_score, reverse=True)[:top_k]
|
|
231
|
-
for i, r in enumerate(ranked, start=1):
|
|
232
|
-
r.rank = i
|
|
233
|
-
|
|
234
|
-
# Synthesize final answer
|
|
235
|
-
context, citations = self._context_builder.build(ranked, query, max_tokens=budget.remaining * 4)
|
|
236
|
-
if kg_results:
|
|
237
|
-
context += "\n\nKnowledge Graph facts:\n" + json.dumps(kg_results[:5], indent=2)
|
|
238
|
-
|
|
239
|
-
if self._llm is not None and ranked:
|
|
240
|
-
agent_resp = await self._llm.generate(query, context, citations, ranked)
|
|
241
|
-
answer = agent_resp.answer
|
|
242
|
-
elif ranked:
|
|
243
|
-
answer = "Based on retrieved sources: " + " ".join(
|
|
244
|
-
r.snippet[:150] for r in ranked[:2]
|
|
245
|
-
)
|
|
246
|
-
else:
|
|
247
|
-
answer = "No relevant information found across sub-agent searches."
|
|
248
|
-
|
|
249
|
-
# Fact verification pass
|
|
250
|
-
contradiction_flags: list[str] = []
|
|
251
|
-
if self._fact_verifier is not None and ranked:
|
|
252
|
-
verification = await self._fact_verifier.verify(answer, ranked)
|
|
253
|
-
contradiction_flags = [v["claim"] for v in verification if not v["verified"]]
|
|
254
|
-
|
|
255
|
-
await self._memory.set(session_id, "last_query", query)
|
|
256
|
-
await self._memory.set(session_id, "last_answer", answer)
|
|
257
|
-
|
|
258
|
-
elapsed = (time.perf_counter() - t0) * 1000
|
|
259
|
-
return {
|
|
260
|
-
"query": query,
|
|
261
|
-
"answer": answer,
|
|
262
|
-
"citations": [c.model_dump() for c in citations],
|
|
263
|
-
"sub_results": [
|
|
264
|
-
{"task_id": sr.task_id, "type": sr.type.value, "elapsed_ms": round(sr.elapsed_ms, 1)}
|
|
265
|
-
for sr in sub_results
|
|
266
|
-
],
|
|
267
|
-
"kg_facts": kg_results[:5],
|
|
268
|
-
"contradiction_flags": contradiction_flags,
|
|
269
|
-
"tokens_used": budget.used,
|
|
270
|
-
"session_id": session_id,
|
|
271
|
-
"mode": mode,
|
|
272
|
-
"latency_ms": round(elapsed, 1),
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
# ── Execution strategies ───────────────────────────────────────────────────
|
|
276
|
-
|
|
277
|
-
async def _run_parallel(
|
|
278
|
-
self, tasks: list[SubTask], top_k: int, budget: TokenBudget, session_id: str
|
|
279
|
-
) -> list[SubTaskResult]:
|
|
280
|
-
coros = [self._execute_task(t, top_k, budget, session_id) for t in tasks]
|
|
281
|
-
return await asyncio.gather(*coros)
|
|
282
|
-
|
|
283
|
-
async def _run_sequential(
|
|
284
|
-
self, tasks: list[SubTask], top_k: int, budget: TokenBudget, session_id: str
|
|
285
|
-
) -> list[SubTaskResult]:
|
|
286
|
-
results = []
|
|
287
|
-
for t in tasks:
|
|
288
|
-
results.append(await self._execute_task(t, top_k, budget, session_id))
|
|
289
|
-
return results
|
|
290
|
-
|
|
291
|
-
async def _execute_task(
|
|
292
|
-
self, task: SubTask, top_k: int, budget: TokenBudget, session_id: str
|
|
293
|
-
) -> SubTaskResult:
|
|
294
|
-
t0 = time.perf_counter()
|
|
295
|
-
|
|
296
|
-
if task.type == TaskType.SEARCH_WEB:
|
|
297
|
-
results = await self._retriever.search(task.query, top_k=top_k)
|
|
298
|
-
output: Any = results
|
|
299
|
-
|
|
300
|
-
elif task.type == TaskType.SEARCH_KG:
|
|
301
|
-
if self._kg is not None:
|
|
302
|
-
output = self._kg.search_entities(task.query, limit=5)
|
|
303
|
-
else:
|
|
304
|
-
output = []
|
|
305
|
-
|
|
306
|
-
elif task.type == TaskType.VERIFY_FACT:
|
|
307
|
-
output = [] # handled post-hoc by FactVerifier
|
|
308
|
-
|
|
309
|
-
else: # SUMMARIZE — handled by caller
|
|
310
|
-
output = None
|
|
311
|
-
|
|
312
|
-
elapsed_ms = (time.perf_counter() - t0) * 1000
|
|
313
|
-
await self._memory.set(session_id, f"task:{task.task_id}", {"type": task.type.value, "elapsed_ms": elapsed_ms})
|
|
314
|
-
|
|
315
|
-
return SubTaskResult(task_id=task.task_id, type=task.type, output=output, elapsed_ms=elapsed_ms)
|