@drico2008/fincli 0.1.9 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +909 -718
- package/fincli/__init__.py +3 -3
- package/fincli/app/agents/__init__.py +5 -0
- package/fincli/app/agents/registry.py +76 -0
- package/fincli/app/analysis/ai_prompts.py +23 -16
- package/fincli/app/analysis/analyzer.py +107 -100
- package/fincli/app/analysis/assistant_context.py +187 -186
- package/fincli/app/analysis/backtest.py +179 -0
- package/fincli/app/analysis/gameplay_plan.py +79 -0
- package/fincli/app/analysis/multi_timeframe.py +180 -0
- package/fincli/app/analysis/trading_methods.py +144 -0
- package/fincli/app/cli/commands.py +105 -83
- package/fincli/app/cli/router.py +2123 -1294
- package/fincli/app/connectors/__init__.py +5 -0
- package/fincli/app/connectors/catalog.py +148 -0
- package/fincli/app/connectors/news_connectors.py +412 -0
- package/fincli/app/modules/alerts.py +80 -0
- package/fincli/app/modules/economic_calendar.py +374 -1
- package/fincli/app/modules/reports.py +151 -0
- package/fincli/app/modules/scanner.py +111 -93
- package/fincli/app/modules/transactions.py +84 -84
- package/fincli/app/modules/user_profile.py +84 -0
- package/fincli/app/plugins/loader.py +72 -0
- package/fincli/app/providers/ai/manager.py +60 -60
- package/fincli/app/providers/market/alphavantage_provider.py +194 -0
- package/fincli/app/providers/market/base.py +98 -77
- package/fincli/app/providers/market/custom_provider.py +186 -169
- package/fincli/app/providers/market/manager.py +84 -1
- package/fincli/app/providers/market/symbols.py +143 -0
- package/fincli/app/providers/market/twelvedata_provider.py +167 -167
- package/fincli/app/research/__init__.py +7 -0
- package/fincli/app/research/engine.py +75 -0
- package/fincli/app/research/formatter.py +22 -0
- package/fincli/app/research/models.py +18 -0
- package/fincli/app/research/prompt_builder.py +47 -0
- package/fincli/app/services/macro_data.py +50 -0
- package/fincli/app/services/market_data.py +203 -203
- package/fincli/app/services/news_aggregator.py +90 -0
- package/fincli/app/services/web_research.py +267 -267
- package/fincli/app/storage/config.py +122 -88
- package/fincli/app/storage/database.py +200 -101
- package/fincli/app/storage/secrets.py +8 -2
- package/fincli/app/tui/components.py +68 -50
- package/fincli/app/tui/layout.py +269 -258
- package/fincli/app/tui/market_provider_selector.py +3 -1
- package/fincli/app/tui/theme.py +134 -74
- package/fincli/app/utils/formatting.py +123 -60
- package/package.json +23 -23
- package/pyproject.toml +35 -35
|
@@ -1,267 +1,267 @@
|
|
|
1
|
-
"""Lightweight web research service for AI assistance."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from dataclasses import dataclass
|
|
6
|
-
from html import unescape
|
|
7
|
-
from html.parser import HTMLParser
|
|
8
|
-
import re
|
|
9
|
-
from urllib.parse import parse_qs, quote_plus, unquote, urlparse
|
|
10
|
-
from xml.etree import ElementTree
|
|
11
|
-
|
|
12
|
-
import httpx
|
|
13
|
-
|
|
14
|
-
from fincli.app.utils.errors import ProviderError
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
@dataclass(frozen=True, slots=True)
|
|
18
|
-
class WebSearchResult:
|
|
19
|
-
title: str
|
|
20
|
-
url: str
|
|
21
|
-
snippet: str = ""
|
|
22
|
-
content: str = ""
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class WebResearchService:
|
|
26
|
-
"""Search and fetch public web pages without browser automation."""
|
|
27
|
-
|
|
28
|
-
def __init__(self, client: httpx.AsyncClient | None = None, timeout_seconds: float = 6.0) -> None:
|
|
29
|
-
self._client = client
|
|
30
|
-
self.timeout_seconds = timeout_seconds
|
|
31
|
-
|
|
32
|
-
async def research(self, query: str, limit: int = 3) -> list[WebSearchResult]:
|
|
33
|
-
normalized = query.strip()
|
|
34
|
-
if not normalized:
|
|
35
|
-
return []
|
|
36
|
-
search_results = await self.search(normalized, limit=limit)
|
|
37
|
-
enriched: list[WebSearchResult] = []
|
|
38
|
-
for result in search_results[:limit]:
|
|
39
|
-
content = await self.fetch_text(result.url)
|
|
40
|
-
enriched.append(
|
|
41
|
-
WebSearchResult(
|
|
42
|
-
title=result.title,
|
|
43
|
-
url=result.url,
|
|
44
|
-
snippet=result.snippet,
|
|
45
|
-
content=content,
|
|
46
|
-
)
|
|
47
|
-
)
|
|
48
|
-
return enriched
|
|
49
|
-
|
|
50
|
-
async def search(self, query: str, limit: int = 5) -> list[WebSearchResult]:
|
|
51
|
-
errors: list[str] = []
|
|
52
|
-
for searcher in (self._search_duckduckgo, self._search_google_news):
|
|
53
|
-
try:
|
|
54
|
-
results = await searcher(query, limit)
|
|
55
|
-
except ProviderError as exc:
|
|
56
|
-
errors.append(str(exc))
|
|
57
|
-
continue
|
|
58
|
-
if results:
|
|
59
|
-
return results
|
|
60
|
-
detail = "\n".join(f"- {error}" for error in errors) if errors else "Tidak ada hasil publik."
|
|
61
|
-
raise ProviderError(
|
|
62
|
-
"Semua web search provider gagal atau kosong.",
|
|
63
|
-
f"{detail}\nCoba ulangi, sederhanakan query, atau cek koneksi/DNS.",
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
async def _search_duckduckgo(self, query: str, limit: int) -> list[WebSearchResult]:
|
|
67
|
-
html = await self._get_text(f"https://duckduckgo.com/html/?q={quote_plus(query)}")
|
|
68
|
-
parser = _DuckDuckGoParser()
|
|
69
|
-
parser.feed(html)
|
|
70
|
-
results: list[WebSearchResult] = []
|
|
71
|
-
seen: set[str] = set()
|
|
72
|
-
for item in parser.results:
|
|
73
|
-
target = _clean_duckduckgo_url(item.url)
|
|
74
|
-
if not target or target in seen:
|
|
75
|
-
continue
|
|
76
|
-
seen.add(target)
|
|
77
|
-
results.append(WebSearchResult(title=_clean_text(item.title), url=target, snippet=_clean_text(item.snippet)))
|
|
78
|
-
if len(results) >= limit:
|
|
79
|
-
break
|
|
80
|
-
return results
|
|
81
|
-
|
|
82
|
-
async def _search_google_news(self, query: str, limit: int) -> list[WebSearchResult]:
|
|
83
|
-
rss = await self._get_text(f"https://news.google.com/rss/search?q={quote_plus(query)}&hl=id&gl=ID&ceid=ID:id")
|
|
84
|
-
try:
|
|
85
|
-
root = ElementTree.fromstring(rss)
|
|
86
|
-
except ElementTree.ParseError as exc:
|
|
87
|
-
raise ProviderError("Google News RSS tidak valid.") from exc
|
|
88
|
-
|
|
89
|
-
results: list[WebSearchResult] = []
|
|
90
|
-
seen: set[str] = set()
|
|
91
|
-
for item in root.findall(".//item"):
|
|
92
|
-
title = _clean_text(item.findtext("title") or "")
|
|
93
|
-
url = _clean_text(item.findtext("link") or "")
|
|
94
|
-
snippet = _clean_text(_html_to_text(item.findtext("description") or ""))
|
|
95
|
-
if not title or not url or url in seen:
|
|
96
|
-
continue
|
|
97
|
-
seen.add(url)
|
|
98
|
-
results.append(WebSearchResult(title=title, url=url, snippet=snippet))
|
|
99
|
-
if len(results) >= limit:
|
|
100
|
-
break
|
|
101
|
-
return results
|
|
102
|
-
|
|
103
|
-
async def fetch_text(self, url: str, max_chars: int = 2400) -> str:
|
|
104
|
-
if not url.startswith(("http://", "https://")):
|
|
105
|
-
return ""
|
|
106
|
-
try:
|
|
107
|
-
html = await self._get_text(url)
|
|
108
|
-
except ProviderError:
|
|
109
|
-
return ""
|
|
110
|
-
text = _html_to_text(html)
|
|
111
|
-
return text[:max_chars]
|
|
112
|
-
|
|
113
|
-
async def _get_text(self, url: str) -> str:
|
|
114
|
-
headers = {
|
|
115
|
-
"User-Agent": "FinCLI/0.1 web research (+https://www.npmjs.com/package/@drico2008/fincli)",
|
|
116
|
-
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,text/plain;q=0.8,*/*;q=0.7",
|
|
117
|
-
}
|
|
118
|
-
close_client = self._client is None
|
|
119
|
-
client = self._client or httpx.AsyncClient(timeout=self.timeout_seconds, follow_redirects=True, headers=headers)
|
|
120
|
-
try:
|
|
121
|
-
response = await client.get(url, headers=headers)
|
|
122
|
-
response.raise_for_status()
|
|
123
|
-
return response.text
|
|
124
|
-
except httpx.TimeoutException as exc:
|
|
125
|
-
raise ProviderError("Web research timeout.", f"URL: {url}") from exc
|
|
126
|
-
except httpx.HTTPStatusError as exc:
|
|
127
|
-
raise ProviderError(f"Web research gagal: HTTP {exc.response.status_code}.", f"URL: {url}") from exc
|
|
128
|
-
except httpx.RequestError as exc:
|
|
129
|
-
raise ProviderError(f"Web research gagal terhubung: {exc}.", f"URL: {url}") from exc
|
|
130
|
-
finally:
|
|
131
|
-
if close_client:
|
|
132
|
-
await client.aclose()
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
def should_use_web_research(prompt: str) -> bool:
|
|
136
|
-
"""Detect prompts that benefit from current public web context."""
|
|
137
|
-
normalized = prompt.lower()
|
|
138
|
-
keywords = (
|
|
139
|
-
"terkini",
|
|
140
|
-
"terbaru",
|
|
141
|
-
"hari ini",
|
|
142
|
-
"sekarang",
|
|
143
|
-
"saat ini",
|
|
144
|
-
"update",
|
|
145
|
-
"berita",
|
|
146
|
-
"news",
|
|
147
|
-
"web",
|
|
148
|
-
"search",
|
|
149
|
-
"cari",
|
|
150
|
-
"penyebab",
|
|
151
|
-
"mengapa",
|
|
152
|
-
"kenapa",
|
|
153
|
-
"rupiah",
|
|
154
|
-
"inflasi",
|
|
155
|
-
"suku bunga",
|
|
156
|
-
"bank indonesia",
|
|
157
|
-
"fed",
|
|
158
|
-
"dollar",
|
|
159
|
-
"dolar",
|
|
160
|
-
"yield",
|
|
161
|
-
"minyak",
|
|
162
|
-
"emas",
|
|
163
|
-
)
|
|
164
|
-
return any(keyword in normalized for keyword in keywords)
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
def build_web_research_context(results: list[WebSearchResult]) -> str:
|
|
168
|
-
if not results:
|
|
169
|
-
return "Web Research: no public web context returned."
|
|
170
|
-
sections = ["Web Research Context:"]
|
|
171
|
-
for index, result in enumerate(results, start=1):
|
|
172
|
-
sections.extend(
|
|
173
|
-
[
|
|
174
|
-
f"{index}. {result.title}",
|
|
175
|
-
f"URL: {result.url}",
|
|
176
|
-
f"Snippet: {result.snippet or 'N/A'}",
|
|
177
|
-
f"Extract: {result.content or 'N/A'}",
|
|
178
|
-
]
|
|
179
|
-
)
|
|
180
|
-
return "\n".join(sections)
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
class _DuckResult:
|
|
184
|
-
def __init__(self) -> None:
|
|
185
|
-
self.title = ""
|
|
186
|
-
self.url = ""
|
|
187
|
-
self.snippet = ""
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
class _DuckDuckGoParser(HTMLParser):
|
|
191
|
-
def __init__(self) -> None:
|
|
192
|
-
super().__init__()
|
|
193
|
-
self.results: list[_DuckResult] = []
|
|
194
|
-
self._current: _DuckResult | None = None
|
|
195
|
-
self._capture: str | None = None
|
|
196
|
-
self._buffer: list[str] = []
|
|
197
|
-
|
|
198
|
-
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
|
199
|
-
attr = dict(attrs)
|
|
200
|
-
classes = attr.get("class", "")
|
|
201
|
-
if tag == "a" and "result__a" in classes:
|
|
202
|
-
self._current = _DuckResult()
|
|
203
|
-
self._current.url = attr.get("href", "") or ""
|
|
204
|
-
self._capture = "title"
|
|
205
|
-
self._buffer = []
|
|
206
|
-
elif self._current is not None and tag in {"a", "div"} and "result__snippet" in classes:
|
|
207
|
-
self._capture = "snippet"
|
|
208
|
-
self._buffer = []
|
|
209
|
-
|
|
210
|
-
def handle_data(self, data: str) -> None:
|
|
211
|
-
if self._capture:
|
|
212
|
-
self._buffer.append(data)
|
|
213
|
-
|
|
214
|
-
def handle_endtag(self, tag: str) -> None:
|
|
215
|
-
if self._current is None or self._capture is None:
|
|
216
|
-
return
|
|
217
|
-
if self._capture == "title" and tag == "a":
|
|
218
|
-
self._current.title = _clean_text(" ".join(self._buffer))
|
|
219
|
-
self._capture = None
|
|
220
|
-
self._buffer = []
|
|
221
|
-
elif self._capture == "snippet" and tag in {"a", "div"}:
|
|
222
|
-
self._current.snippet = _clean_text(" ".join(self._buffer))
|
|
223
|
-
self.results.append(self._current)
|
|
224
|
-
self._current = None
|
|
225
|
-
self._capture = None
|
|
226
|
-
self._buffer = []
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
class _TextExtractor(HTMLParser):
|
|
230
|
-
def __init__(self) -> None:
|
|
231
|
-
super().__init__()
|
|
232
|
-
self.parts: list[str] = []
|
|
233
|
-
self._skip_depth = 0
|
|
234
|
-
|
|
235
|
-
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
|
236
|
-
if tag in {"script", "style", "noscript", "svg"}:
|
|
237
|
-
self._skip_depth += 1
|
|
238
|
-
|
|
239
|
-
def handle_endtag(self, tag: str) -> None:
|
|
240
|
-
if tag in {"script", "style", "noscript", "svg"} and self._skip_depth:
|
|
241
|
-
self._skip_depth -= 1
|
|
242
|
-
|
|
243
|
-
def handle_data(self, data: str) -> None:
|
|
244
|
-
if not self._skip_depth:
|
|
245
|
-
cleaned = _clean_text(data)
|
|
246
|
-
if cleaned:
|
|
247
|
-
self.parts.append(cleaned)
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
def _html_to_text(html: str) -> str:
|
|
251
|
-
extractor = _TextExtractor()
|
|
252
|
-
extractor.feed(html)
|
|
253
|
-
return _clean_text(" ".join(extractor.parts))
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def _clean_text(value: str) -> str:
|
|
257
|
-
text = unescape(value)
|
|
258
|
-
text = re.sub(r"\s+", " ", text)
|
|
259
|
-
return text.strip()
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
def _clean_duckduckgo_url(url: str) -> str:
|
|
263
|
-
parsed = urlparse(url)
|
|
264
|
-
if parsed.netloc.endswith("duckduckgo.com") and parsed.path.startswith("/l/"):
|
|
265
|
-
target = parse_qs(parsed.query).get("uddg", [""])[0]
|
|
266
|
-
return unquote(target)
|
|
267
|
-
return url
|
|
1
|
+
"""Lightweight web research service for AI assistance."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from html import unescape
|
|
7
|
+
from html.parser import HTMLParser
|
|
8
|
+
import re
|
|
9
|
+
from urllib.parse import parse_qs, quote_plus, unquote, urlparse
|
|
10
|
+
from xml.etree import ElementTree
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
|
|
14
|
+
from fincli.app.utils.errors import ProviderError
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True, slots=True)
|
|
18
|
+
class WebSearchResult:
|
|
19
|
+
title: str
|
|
20
|
+
url: str
|
|
21
|
+
snippet: str = ""
|
|
22
|
+
content: str = ""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class WebResearchService:
|
|
26
|
+
"""Search and fetch public web pages without browser automation."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, client: httpx.AsyncClient | None = None, timeout_seconds: float = 6.0) -> None:
|
|
29
|
+
self._client = client
|
|
30
|
+
self.timeout_seconds = timeout_seconds
|
|
31
|
+
|
|
32
|
+
async def research(self, query: str, limit: int = 3) -> list[WebSearchResult]:
|
|
33
|
+
normalized = query.strip()
|
|
34
|
+
if not normalized:
|
|
35
|
+
return []
|
|
36
|
+
search_results = await self.search(normalized, limit=limit)
|
|
37
|
+
enriched: list[WebSearchResult] = []
|
|
38
|
+
for result in search_results[:limit]:
|
|
39
|
+
content = await self.fetch_text(result.url)
|
|
40
|
+
enriched.append(
|
|
41
|
+
WebSearchResult(
|
|
42
|
+
title=result.title,
|
|
43
|
+
url=result.url,
|
|
44
|
+
snippet=result.snippet,
|
|
45
|
+
content=content,
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
return enriched
|
|
49
|
+
|
|
50
|
+
async def search(self, query: str, limit: int = 5) -> list[WebSearchResult]:
|
|
51
|
+
errors: list[str] = []
|
|
52
|
+
for searcher in (self._search_duckduckgo, self._search_google_news):
|
|
53
|
+
try:
|
|
54
|
+
results = await searcher(query, limit)
|
|
55
|
+
except ProviderError as exc:
|
|
56
|
+
errors.append(str(exc))
|
|
57
|
+
continue
|
|
58
|
+
if results:
|
|
59
|
+
return results
|
|
60
|
+
detail = "\n".join(f"- {error}" for error in errors) if errors else "Tidak ada hasil publik."
|
|
61
|
+
raise ProviderError(
|
|
62
|
+
"Semua web search provider gagal atau kosong.",
|
|
63
|
+
f"{detail}\nCoba ulangi, sederhanakan query, atau cek koneksi/DNS.",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
async def _search_duckduckgo(self, query: str, limit: int) -> list[WebSearchResult]:
|
|
67
|
+
html = await self._get_text(f"https://duckduckgo.com/html/?q={quote_plus(query)}")
|
|
68
|
+
parser = _DuckDuckGoParser()
|
|
69
|
+
parser.feed(html)
|
|
70
|
+
results: list[WebSearchResult] = []
|
|
71
|
+
seen: set[str] = set()
|
|
72
|
+
for item in parser.results:
|
|
73
|
+
target = _clean_duckduckgo_url(item.url)
|
|
74
|
+
if not target or target in seen:
|
|
75
|
+
continue
|
|
76
|
+
seen.add(target)
|
|
77
|
+
results.append(WebSearchResult(title=_clean_text(item.title), url=target, snippet=_clean_text(item.snippet)))
|
|
78
|
+
if len(results) >= limit:
|
|
79
|
+
break
|
|
80
|
+
return results
|
|
81
|
+
|
|
82
|
+
async def _search_google_news(self, query: str, limit: int) -> list[WebSearchResult]:
|
|
83
|
+
rss = await self._get_text(f"https://news.google.com/rss/search?q={quote_plus(query)}&hl=id&gl=ID&ceid=ID:id")
|
|
84
|
+
try:
|
|
85
|
+
root = ElementTree.fromstring(rss)
|
|
86
|
+
except ElementTree.ParseError as exc:
|
|
87
|
+
raise ProviderError("Google News RSS tidak valid.") from exc
|
|
88
|
+
|
|
89
|
+
results: list[WebSearchResult] = []
|
|
90
|
+
seen: set[str] = set()
|
|
91
|
+
for item in root.findall(".//item"):
|
|
92
|
+
title = _clean_text(item.findtext("title") or "")
|
|
93
|
+
url = _clean_text(item.findtext("link") or "")
|
|
94
|
+
snippet = _clean_text(_html_to_text(item.findtext("description") or ""))
|
|
95
|
+
if not title or not url or url in seen:
|
|
96
|
+
continue
|
|
97
|
+
seen.add(url)
|
|
98
|
+
results.append(WebSearchResult(title=title, url=url, snippet=snippet))
|
|
99
|
+
if len(results) >= limit:
|
|
100
|
+
break
|
|
101
|
+
return results
|
|
102
|
+
|
|
103
|
+
async def fetch_text(self, url: str, max_chars: int = 2400) -> str:
|
|
104
|
+
if not url.startswith(("http://", "https://")):
|
|
105
|
+
return ""
|
|
106
|
+
try:
|
|
107
|
+
html = await self._get_text(url)
|
|
108
|
+
except ProviderError:
|
|
109
|
+
return ""
|
|
110
|
+
text = _html_to_text(html)
|
|
111
|
+
return text[:max_chars]
|
|
112
|
+
|
|
113
|
+
async def _get_text(self, url: str) -> str:
|
|
114
|
+
headers = {
|
|
115
|
+
"User-Agent": "FinCLI/0.1 web research (+https://www.npmjs.com/package/@drico2008/fincli)",
|
|
116
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,text/plain;q=0.8,*/*;q=0.7",
|
|
117
|
+
}
|
|
118
|
+
close_client = self._client is None
|
|
119
|
+
client = self._client or httpx.AsyncClient(timeout=self.timeout_seconds, follow_redirects=True, headers=headers)
|
|
120
|
+
try:
|
|
121
|
+
response = await client.get(url, headers=headers)
|
|
122
|
+
response.raise_for_status()
|
|
123
|
+
return response.text
|
|
124
|
+
except httpx.TimeoutException as exc:
|
|
125
|
+
raise ProviderError("Web research timeout.", f"URL: {url}") from exc
|
|
126
|
+
except httpx.HTTPStatusError as exc:
|
|
127
|
+
raise ProviderError(f"Web research gagal: HTTP {exc.response.status_code}.", f"URL: {url}") from exc
|
|
128
|
+
except httpx.RequestError as exc:
|
|
129
|
+
raise ProviderError(f"Web research gagal terhubung: {exc}.", f"URL: {url}") from exc
|
|
130
|
+
finally:
|
|
131
|
+
if close_client:
|
|
132
|
+
await client.aclose()
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def should_use_web_research(prompt: str) -> bool:
|
|
136
|
+
"""Detect prompts that benefit from current public web context."""
|
|
137
|
+
normalized = prompt.lower()
|
|
138
|
+
keywords = (
|
|
139
|
+
"terkini",
|
|
140
|
+
"terbaru",
|
|
141
|
+
"hari ini",
|
|
142
|
+
"sekarang",
|
|
143
|
+
"saat ini",
|
|
144
|
+
"update",
|
|
145
|
+
"berita",
|
|
146
|
+
"news",
|
|
147
|
+
"web",
|
|
148
|
+
"search",
|
|
149
|
+
"cari",
|
|
150
|
+
"penyebab",
|
|
151
|
+
"mengapa",
|
|
152
|
+
"kenapa",
|
|
153
|
+
"rupiah",
|
|
154
|
+
"inflasi",
|
|
155
|
+
"suku bunga",
|
|
156
|
+
"bank indonesia",
|
|
157
|
+
"fed",
|
|
158
|
+
"dollar",
|
|
159
|
+
"dolar",
|
|
160
|
+
"yield",
|
|
161
|
+
"minyak",
|
|
162
|
+
"emas",
|
|
163
|
+
)
|
|
164
|
+
return any(keyword in normalized for keyword in keywords)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def build_web_research_context(results: list[WebSearchResult]) -> str:
|
|
168
|
+
if not results:
|
|
169
|
+
return "Web Research: no public web context returned."
|
|
170
|
+
sections = ["Web Research Context:"]
|
|
171
|
+
for index, result in enumerate(results, start=1):
|
|
172
|
+
sections.extend(
|
|
173
|
+
[
|
|
174
|
+
f"{index}. {result.title}",
|
|
175
|
+
f"URL: {result.url}",
|
|
176
|
+
f"Snippet: {result.snippet or 'N/A'}",
|
|
177
|
+
f"Extract: {result.content or 'N/A'}",
|
|
178
|
+
]
|
|
179
|
+
)
|
|
180
|
+
return "\n".join(sections)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class _DuckResult:
|
|
184
|
+
def __init__(self) -> None:
|
|
185
|
+
self.title = ""
|
|
186
|
+
self.url = ""
|
|
187
|
+
self.snippet = ""
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class _DuckDuckGoParser(HTMLParser):
|
|
191
|
+
def __init__(self) -> None:
|
|
192
|
+
super().__init__()
|
|
193
|
+
self.results: list[_DuckResult] = []
|
|
194
|
+
self._current: _DuckResult | None = None
|
|
195
|
+
self._capture: str | None = None
|
|
196
|
+
self._buffer: list[str] = []
|
|
197
|
+
|
|
198
|
+
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
|
199
|
+
attr = dict(attrs)
|
|
200
|
+
classes = attr.get("class", "")
|
|
201
|
+
if tag == "a" and "result__a" in classes:
|
|
202
|
+
self._current = _DuckResult()
|
|
203
|
+
self._current.url = attr.get("href", "") or ""
|
|
204
|
+
self._capture = "title"
|
|
205
|
+
self._buffer = []
|
|
206
|
+
elif self._current is not None and tag in {"a", "div"} and "result__snippet" in classes:
|
|
207
|
+
self._capture = "snippet"
|
|
208
|
+
self._buffer = []
|
|
209
|
+
|
|
210
|
+
def handle_data(self, data: str) -> None:
|
|
211
|
+
if self._capture:
|
|
212
|
+
self._buffer.append(data)
|
|
213
|
+
|
|
214
|
+
def handle_endtag(self, tag: str) -> None:
|
|
215
|
+
if self._current is None or self._capture is None:
|
|
216
|
+
return
|
|
217
|
+
if self._capture == "title" and tag == "a":
|
|
218
|
+
self._current.title = _clean_text(" ".join(self._buffer))
|
|
219
|
+
self._capture = None
|
|
220
|
+
self._buffer = []
|
|
221
|
+
elif self._capture == "snippet" and tag in {"a", "div"}:
|
|
222
|
+
self._current.snippet = _clean_text(" ".join(self._buffer))
|
|
223
|
+
self.results.append(self._current)
|
|
224
|
+
self._current = None
|
|
225
|
+
self._capture = None
|
|
226
|
+
self._buffer = []
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class _TextExtractor(HTMLParser):
|
|
230
|
+
def __init__(self) -> None:
|
|
231
|
+
super().__init__()
|
|
232
|
+
self.parts: list[str] = []
|
|
233
|
+
self._skip_depth = 0
|
|
234
|
+
|
|
235
|
+
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
|
236
|
+
if tag in {"script", "style", "noscript", "svg"}:
|
|
237
|
+
self._skip_depth += 1
|
|
238
|
+
|
|
239
|
+
def handle_endtag(self, tag: str) -> None:
|
|
240
|
+
if tag in {"script", "style", "noscript", "svg"} and self._skip_depth:
|
|
241
|
+
self._skip_depth -= 1
|
|
242
|
+
|
|
243
|
+
def handle_data(self, data: str) -> None:
|
|
244
|
+
if not self._skip_depth:
|
|
245
|
+
cleaned = _clean_text(data)
|
|
246
|
+
if cleaned:
|
|
247
|
+
self.parts.append(cleaned)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _html_to_text(html: str) -> str:
|
|
251
|
+
extractor = _TextExtractor()
|
|
252
|
+
extractor.feed(html)
|
|
253
|
+
return _clean_text(" ".join(extractor.parts))
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _clean_text(value: str) -> str:
|
|
257
|
+
text = unescape(value)
|
|
258
|
+
text = re.sub(r"\s+", " ", text)
|
|
259
|
+
return text.strip()
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _clean_duckduckgo_url(url: str) -> str:
|
|
263
|
+
parsed = urlparse(url)
|
|
264
|
+
if parsed.netloc.endswith("duckduckgo.com") and parsed.path.startswith("/l/"):
|
|
265
|
+
target = parse_qs(parsed.query).get("uddg", [""])[0]
|
|
266
|
+
return unquote(target)
|
|
267
|
+
return url
|