webctx 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +84 -0
- package/CONTRIBUTORS.md +93 -0
- package/LICENSE +21 -0
- package/Makefile +69 -0
- package/README.md +95 -0
- package/bin/webctx.js +28 -0
- package/cmd/webctx/main.go +11 -0
- package/docs/porting-status.md +173 -0
- package/go.mod +3 -0
- package/internal/app/app.go +139 -0
- package/internal/app/app_test.go +77 -0
- package/internal/app/scrape.go +310 -0
- package/internal/app/tools.go +558 -0
- package/internal/buildinfo/buildinfo.go +16 -0
- package/package.json +55 -0
- package/scripts/postinstall.js +137 -0
|
@@ -0,0 +1,558 @@
|
|
|
1
|
+
package app
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"context"
|
|
5
|
+
"encoding/json"
|
|
6
|
+
"errors"
|
|
7
|
+
"fmt"
|
|
8
|
+
"io"
|
|
9
|
+
"net/http"
|
|
10
|
+
"net/url"
|
|
11
|
+
"os"
|
|
12
|
+
"sort"
|
|
13
|
+
"strings"
|
|
14
|
+
"sync"
|
|
15
|
+
"time"
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
type SearchParams struct {
|
|
19
|
+
Query string
|
|
20
|
+
ExcludeDomains []string
|
|
21
|
+
IncludeKeyword string
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
type SearchDoc struct {
|
|
25
|
+
URL string
|
|
26
|
+
Title string
|
|
27
|
+
Overview string
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
type SearchResult struct {
|
|
31
|
+
Docs []SearchDoc
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
type providerDocs struct {
|
|
35
|
+
Docs []SearchDoc
|
|
36
|
+
Provider string
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
type scoredURL struct {
|
|
40
|
+
Result SearchDoc
|
|
41
|
+
TotalScore float64
|
|
42
|
+
DuplicateCount int
|
|
43
|
+
BestPosition int
|
|
44
|
+
FinalScore float64
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
func Search(params SearchParams) (string, error) {
|
|
48
|
+
defaultExcludedDomains := []string{"youtube.com", "vimeo.com", "dailymotion.com", "twitch.tv", "tiktok.com", "instagram.com", "facebook.com"}
|
|
49
|
+
allExcludedDomains := append(append([]string{}, defaultExcludedDomains...), params.ExcludeDomains...)
|
|
50
|
+
truncatedKeyword := truncateWords(params.IncludeKeyword, 5)
|
|
51
|
+
|
|
52
|
+
type namedSearch struct {
|
|
53
|
+
name string
|
|
54
|
+
fn func(context.Context) (SearchResult, error)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
searches := []namedSearch{}
|
|
58
|
+
if strings.TrimSpace(truncatedKeyword) != "" {
|
|
59
|
+
searches = append(searches, namedSearch{name: "Exa", fn: func(ctx context.Context) (SearchResult, error) {
|
|
60
|
+
return searchWithExa(ctx, params.Query, nil, truncatedKeyword)
|
|
61
|
+
}})
|
|
62
|
+
} else {
|
|
63
|
+
searches = append(searches,
|
|
64
|
+
namedSearch{name: "Brave", fn: func(ctx context.Context) (SearchResult, error) { return searchWithBrave(ctx, params.Query) }},
|
|
65
|
+
namedSearch{name: "Tavily", fn: func(ctx context.Context) (SearchResult, error) { return searchWithTavily(ctx, params.Query, nil) }},
|
|
66
|
+
namedSearch{name: "Exa", fn: func(ctx context.Context) (SearchResult, error) { return searchWithExa(ctx, params.Query, nil, "") }},
|
|
67
|
+
)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
results := make([]providerDocs, len(searches))
|
|
71
|
+
var wg sync.WaitGroup
|
|
72
|
+
for i, s := range searches {
|
|
73
|
+
wg.Add(1)
|
|
74
|
+
go func(i int, s namedSearch) {
|
|
75
|
+
defer wg.Done()
|
|
76
|
+
ctx, cancel := context.WithTimeout(context.Background(), 40*time.Second)
|
|
77
|
+
defer cancel()
|
|
78
|
+
res, err := s.fn(ctx)
|
|
79
|
+
if err != nil {
|
|
80
|
+
results[i] = providerDocs{Provider: s.name, Docs: []SearchDoc{}}
|
|
81
|
+
return
|
|
82
|
+
}
|
|
83
|
+
results[i] = providerDocs{Provider: s.name, Docs: res.Docs}
|
|
84
|
+
}(i, s)
|
|
85
|
+
}
|
|
86
|
+
wg.Wait()
|
|
87
|
+
|
|
88
|
+
total := 0
|
|
89
|
+
for _, r := range results {
|
|
90
|
+
total += len(r.Docs)
|
|
91
|
+
}
|
|
92
|
+
if total == 0 {
|
|
93
|
+
return "", errors.New("Error searching the web: All search providers failed to return results")
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
filtered := filterExcludedDomains(results, allExcludedDomains)
|
|
97
|
+
ranked, _ := scoreAndRankResults(filtered)
|
|
98
|
+
if len(ranked) > 35 {
|
|
99
|
+
ranked = ranked[:35]
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
parts := []string{fmt.Sprintf("Total Results: %d\n", len(ranked))}
|
|
103
|
+
for _, doc := range ranked {
|
|
104
|
+
title := decodeHTML(doc.Title)
|
|
105
|
+
overview := decodeHTML(doc.Overview)
|
|
106
|
+
parts = append(parts, fmt.Sprintf("- [%s](%s)", title, doc.URL))
|
|
107
|
+
if overview != "" {
|
|
108
|
+
parts = append(parts, fmt.Sprintf(" - %s", overview))
|
|
109
|
+
}
|
|
110
|
+
parts = append(parts, "")
|
|
111
|
+
}
|
|
112
|
+
return strings.Join(parts, "\n"), nil
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
func ReadLink(rawURL string) (string, error) {
|
|
116
|
+
if result, _ := fetchGitHubRawContent(rawURL); result != nil {
|
|
117
|
+
return formatReadLink(result.Title, result.URL, result.Markdown), nil
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if ok, _ := checkMarkdownAvailable(rawURL); ok {
|
|
121
|
+
result, err := fetchMarkdownContent(rawURL)
|
|
122
|
+
if err == nil {
|
|
123
|
+
return formatReadLink(result.Title, result.URL, result.Markdown), nil
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
apiKey := strings.TrimSpace(os.Getenv("FIRECRAWL_API_KEY"))
|
|
128
|
+
if apiKey == "" {
|
|
129
|
+
return "", errors.New("Error reading web page: FIRECRAWL_API_KEY environment variable is required for non-.md URLs")
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
requestBody := map[string]any{
|
|
133
|
+
"url": rawURL,
|
|
134
|
+
"formats": []string{"markdown"},
|
|
135
|
+
"onlyMainContent": true,
|
|
136
|
+
"skipTlsVerification": true,
|
|
137
|
+
"blockAds": true,
|
|
138
|
+
"removeBase64Images": true,
|
|
139
|
+
"maxAge": 600000,
|
|
140
|
+
"excludeTags": []string{"script", "style", "meta", "noscript", "svg", "img", "nav", "footer", "header", "aside", ".advertisement", "#ad"},
|
|
141
|
+
}
|
|
142
|
+
if strings.HasSuffix(strings.ToLower(rawURL), ".pdf") {
|
|
143
|
+
requestBody["parsers"] = []string{"pdf"}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
data, err := getFirecrawlQueue().enqueue(rawURL, func() (map[string]any, error) {
|
|
147
|
+
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
|
148
|
+
defer cancel()
|
|
149
|
+
body, err := doJSONRequest(ctx, http.MethodPost, "https://api.firecrawl.dev/v2/scrape", map[string]string{
|
|
150
|
+
"Authorization": "Bearer " + apiKey,
|
|
151
|
+
"Content-Type": "application/json",
|
|
152
|
+
}, requestBody)
|
|
153
|
+
if err != nil {
|
|
154
|
+
return nil, err
|
|
155
|
+
}
|
|
156
|
+
var parsed map[string]any
|
|
157
|
+
if err := json.Unmarshal(body, &parsed); err != nil {
|
|
158
|
+
return nil, err
|
|
159
|
+
}
|
|
160
|
+
if success, _ := parsed["success"].(bool); !success {
|
|
161
|
+
return nil, fmt.Errorf("Scraping failed for %s: %v", rawURL, parsed["error"])
|
|
162
|
+
}
|
|
163
|
+
return parsed, nil
|
|
164
|
+
})
|
|
165
|
+
if err != nil {
|
|
166
|
+
return "", fmt.Errorf("Error reading web page: %v", err)
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
dataMap, _ := data["data"].(map[string]any)
|
|
170
|
+
metadata, _ := dataMap["metadata"].(map[string]any)
|
|
171
|
+
title, _ := metadata["title"].(string)
|
|
172
|
+
markdown, _ := dataMap["markdown"].(string)
|
|
173
|
+
if markdown == "" {
|
|
174
|
+
markdown = "No content extracted"
|
|
175
|
+
}
|
|
176
|
+
return formatReadLink(title, rawURL, markdown), nil
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
func MapSite(rawURL string) (string, error) {
|
|
180
|
+
apiKey := strings.TrimSpace(os.Getenv("FIRECRAWL_API_KEY"))
|
|
181
|
+
if apiKey == "" {
|
|
182
|
+
return "", errors.New("Error mapping website: FIRECRAWL_API_KEY environment variable is required")
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
requestBody := map[string]any{
|
|
186
|
+
"url": rawURL,
|
|
187
|
+
"sitemap": "include",
|
|
188
|
+
"includeSubdomains": true,
|
|
189
|
+
"ignoreQueryParameters": true,
|
|
190
|
+
"limit": 5000,
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
|
194
|
+
defer cancel()
|
|
195
|
+
body, err := doJSONRequest(ctx, http.MethodPost, "https://api.firecrawl.dev/v2/map", map[string]string{
|
|
196
|
+
"Authorization": "Bearer " + apiKey,
|
|
197
|
+
"Content-Type": "application/json",
|
|
198
|
+
}, requestBody)
|
|
199
|
+
if err != nil {
|
|
200
|
+
return "", fmt.Errorf("Error mapping website: %v", err)
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
var parsed map[string]any
|
|
204
|
+
if err := json.Unmarshal(body, &parsed); err != nil {
|
|
205
|
+
return "", fmt.Errorf("Error mapping website: %v", err)
|
|
206
|
+
}
|
|
207
|
+
if success, _ := parsed["success"].(bool); !success {
|
|
208
|
+
return "", fmt.Errorf("Error mapping website: Mapping failed for %s: %v", rawURL, parsed["error"])
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
linksAny, _ := parsed["links"].([]any)
|
|
212
|
+
parts := []string{fmt.Sprintf("total urls found: %d", len(linksAny)), ""}
|
|
213
|
+
for _, item := range linksAny {
|
|
214
|
+
switch link := item.(type) {
|
|
215
|
+
case string:
|
|
216
|
+
parts = append(parts, "- "+link, "")
|
|
217
|
+
case map[string]any:
|
|
218
|
+
parts = append(parts, "- "+stringValue(link["url"]))
|
|
219
|
+
if v := stringValue(link["title"]); v != "" {
|
|
220
|
+
parts = append(parts, "- "+v)
|
|
221
|
+
}
|
|
222
|
+
if v := stringValue(link["description"]); v != "" {
|
|
223
|
+
parts = append(parts, "- "+v)
|
|
224
|
+
}
|
|
225
|
+
parts = append(parts, "")
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
return strings.Join(parts, "\n"), nil
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
func formatReadLink(title, rawURL, markdown string) string {
|
|
232
|
+
parts := []string{}
|
|
233
|
+
if strings.TrimSpace(title) != "" {
|
|
234
|
+
parts = append(parts, "# "+title, "")
|
|
235
|
+
}
|
|
236
|
+
parts = append(parts, "**URL:** "+rawURL, "", markdown)
|
|
237
|
+
return strings.Join(parts, "\n")
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
func searchWithBrave(ctx context.Context, query string) (SearchResult, error) {
|
|
241
|
+
apiKey := strings.TrimSpace(os.Getenv("BRAVE_API_KEY"))
|
|
242
|
+
if apiKey == "" {
|
|
243
|
+
return SearchResult{}, errors.New("missing BRAVE_API_KEY")
|
|
244
|
+
}
|
|
245
|
+
params := url.Values{}
|
|
246
|
+
params.Set("q", query)
|
|
247
|
+
params.Set("text_decorations", "false")
|
|
248
|
+
params.Set("result_filter", "web")
|
|
249
|
+
params.Set("limit", "20")
|
|
250
|
+
body, err := doRawRequest(ctx, http.MethodGet, "https://api.search.brave.com/res/v1/web/search?"+params.Encode(), map[string]string{
|
|
251
|
+
"Accept": "application/json",
|
|
252
|
+
"Accept-Encoding": "gzip",
|
|
253
|
+
"x-subscription-token": apiKey,
|
|
254
|
+
}, nil)
|
|
255
|
+
if err != nil {
|
|
256
|
+
return SearchResult{}, err
|
|
257
|
+
}
|
|
258
|
+
var parsed struct {
|
|
259
|
+
Web struct {
|
|
260
|
+
Results []struct {
|
|
261
|
+
URL string `json:"url"`
|
|
262
|
+
Title string `json:"title"`
|
|
263
|
+
Description string `json:"description"`
|
|
264
|
+
} `json:"results"`
|
|
265
|
+
} `json:"web"`
|
|
266
|
+
}
|
|
267
|
+
if err := json.Unmarshal(body, &parsed); err != nil {
|
|
268
|
+
return SearchResult{}, err
|
|
269
|
+
}
|
|
270
|
+
docs := make([]SearchDoc, 0, min(20, len(parsed.Web.Results)))
|
|
271
|
+
for i, r := range parsed.Web.Results {
|
|
272
|
+
if i >= 20 {
|
|
273
|
+
break
|
|
274
|
+
}
|
|
275
|
+
docs = append(docs, SearchDoc{URL: r.URL, Title: r.Title, Overview: r.Description})
|
|
276
|
+
}
|
|
277
|
+
return SearchResult{Docs: docs}, nil
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
func searchWithTavily(ctx context.Context, query string, excludeDomains []string) (SearchResult, error) {
|
|
281
|
+
apiKey := strings.TrimSpace(os.Getenv("TAVILY_API_KEY"))
|
|
282
|
+
if apiKey == "" {
|
|
283
|
+
return SearchResult{}, errors.New("missing TAVILY_API_KEY")
|
|
284
|
+
}
|
|
285
|
+
requestBody := map[string]any{
|
|
286
|
+
"api_key": apiKey,
|
|
287
|
+
"query": query,
|
|
288
|
+
"max_results": 20,
|
|
289
|
+
}
|
|
290
|
+
if len(excludeDomains) > 0 {
|
|
291
|
+
requestBody["exclude_domains"] = excludeDomains
|
|
292
|
+
}
|
|
293
|
+
body, err := doJSONRequest(ctx, http.MethodPost, "https://api.tavily.com/search", map[string]string{
|
|
294
|
+
"Content-Type": "application/json",
|
|
295
|
+
"Accept": "application/json",
|
|
296
|
+
}, requestBody)
|
|
297
|
+
if err != nil {
|
|
298
|
+
return SearchResult{}, err
|
|
299
|
+
}
|
|
300
|
+
var parsed struct {
|
|
301
|
+
Results []struct {
|
|
302
|
+
URL string `json:"url"`
|
|
303
|
+
Title string `json:"title"`
|
|
304
|
+
Content string `json:"content"`
|
|
305
|
+
} `json:"results"`
|
|
306
|
+
}
|
|
307
|
+
if err := json.Unmarshal(body, &parsed); err != nil {
|
|
308
|
+
return SearchResult{}, err
|
|
309
|
+
}
|
|
310
|
+
docs := make([]SearchDoc, 0, min(20, len(parsed.Results)))
|
|
311
|
+
for i, r := range parsed.Results {
|
|
312
|
+
if i >= 20 {
|
|
313
|
+
break
|
|
314
|
+
}
|
|
315
|
+
docs = append(docs, SearchDoc{URL: r.URL, Title: r.Title, Overview: r.Content})
|
|
316
|
+
}
|
|
317
|
+
return SearchResult{Docs: docs}, nil
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
func searchWithExa(ctx context.Context, query string, excludeDomains []string, includeKeyword string) (SearchResult, error) {
|
|
321
|
+
apiKey := strings.TrimSpace(os.Getenv("EXA_API_KEY"))
|
|
322
|
+
if apiKey == "" {
|
|
323
|
+
return SearchResult{}, errors.New("missing EXA_API_KEY")
|
|
324
|
+
}
|
|
325
|
+
requestBody := map[string]any{
|
|
326
|
+
"query": query,
|
|
327
|
+
"type": "auto",
|
|
328
|
+
"numResults": 25,
|
|
329
|
+
"contents": map[string]any{
|
|
330
|
+
"livecrawl": "preferred",
|
|
331
|
+
},
|
|
332
|
+
}
|
|
333
|
+
if len(excludeDomains) > 0 {
|
|
334
|
+
requestBody["excludeDomains"] = excludeDomains
|
|
335
|
+
}
|
|
336
|
+
if strings.TrimSpace(includeKeyword) != "" {
|
|
337
|
+
requestBody["includeText"] = []string{includeKeyword}
|
|
338
|
+
}
|
|
339
|
+
body, err := doJSONRequest(ctx, http.MethodPost, "https://api.exa.ai/search", map[string]string{
|
|
340
|
+
"Accept": "application/json",
|
|
341
|
+
"Content-Type": "application/json",
|
|
342
|
+
"x-api-key": apiKey,
|
|
343
|
+
}, requestBody)
|
|
344
|
+
if err != nil {
|
|
345
|
+
return SearchResult{}, err
|
|
346
|
+
}
|
|
347
|
+
var parsed struct {
|
|
348
|
+
Results []struct {
|
|
349
|
+
URL string `json:"url"`
|
|
350
|
+
Title string `json:"title"`
|
|
351
|
+
Text string `json:"text"`
|
|
352
|
+
Summary string `json:"summary"`
|
|
353
|
+
} `json:"results"`
|
|
354
|
+
}
|
|
355
|
+
if err := json.Unmarshal(body, &parsed); err != nil {
|
|
356
|
+
return SearchResult{}, err
|
|
357
|
+
}
|
|
358
|
+
docs := make([]SearchDoc, 0, min(25, len(parsed.Results)))
|
|
359
|
+
for i, r := range parsed.Results {
|
|
360
|
+
if i >= 25 {
|
|
361
|
+
break
|
|
362
|
+
}
|
|
363
|
+
overview := r.Text
|
|
364
|
+
if overview == "" {
|
|
365
|
+
overview = r.Summary
|
|
366
|
+
}
|
|
367
|
+
docs = append(docs, SearchDoc{URL: r.URL, Title: r.Title, Overview: overview})
|
|
368
|
+
}
|
|
369
|
+
return SearchResult{Docs: docs}, nil
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
func scoreAndRankResults(providerResults []providerDocs) ([]SearchDoc, int) {
|
|
373
|
+
urlScores := map[string]*scoredURL{}
|
|
374
|
+
totalResultsBeforeDedup := 0
|
|
375
|
+
|
|
376
|
+
for _, providerResult := range providerResults {
|
|
377
|
+
totalResultsBeforeDedup += len(providerResult.Docs)
|
|
378
|
+
for idx, doc := range providerResult.Docs {
|
|
379
|
+
normalized := normalizeURL(doc.URL)
|
|
380
|
+
position := idx + 1
|
|
381
|
+
weightedScore := float64(getPositionPoints(position)) * getProviderWeight(providerResult.Provider)
|
|
382
|
+
if existing, ok := urlScores[normalized]; ok {
|
|
383
|
+
existing.TotalScore += weightedScore
|
|
384
|
+
existing.DuplicateCount++
|
|
385
|
+
if position < existing.BestPosition {
|
|
386
|
+
existing.BestPosition = position
|
|
387
|
+
existing.Result = doc
|
|
388
|
+
}
|
|
389
|
+
continue
|
|
390
|
+
}
|
|
391
|
+
urlScores[normalized] = &scoredURL{Result: doc, TotalScore: weightedScore, DuplicateCount: 1, BestPosition: position}
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
scored := make([]*scoredURL, 0, len(urlScores))
|
|
396
|
+
for _, item := range urlScores {
|
|
397
|
+
duplicateBonus := 3.0
|
|
398
|
+
if item.BestPosition <= 5 {
|
|
399
|
+
duplicateBonus = 5.0
|
|
400
|
+
}
|
|
401
|
+
duplicatePenalty := 0.0
|
|
402
|
+
if item.DuplicateCount > 3 {
|
|
403
|
+
duplicatePenalty = -2.0
|
|
404
|
+
}
|
|
405
|
+
item.FinalScore = item.TotalScore + float64(item.DuplicateCount-1)*duplicateBonus + duplicatePenalty
|
|
406
|
+
scored = append(scored, item)
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
sort.SliceStable(scored, func(i, j int) bool {
|
|
410
|
+
if scored[i].FinalScore == scored[j].FinalScore {
|
|
411
|
+
return scored[i].Result.URL < scored[j].Result.URL
|
|
412
|
+
}
|
|
413
|
+
return scored[i].FinalScore > scored[j].FinalScore
|
|
414
|
+
})
|
|
415
|
+
|
|
416
|
+
results := make([]SearchDoc, 0, len(scored))
|
|
417
|
+
for _, item := range scored {
|
|
418
|
+
results = append(results, item.Result)
|
|
419
|
+
}
|
|
420
|
+
return results, totalResultsBeforeDedup - len(scored)
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
func getPositionPoints(position int) int {
|
|
424
|
+
scores := map[int]int{1: 30, 2: 27, 3: 24, 4: 21, 5: 19, 6: 16, 7: 13, 8: 11, 9: 9, 10: 7, 11: 5, 12: 4, 13: 3, 14: 2}
|
|
425
|
+
if score, ok := scores[position]; ok {
|
|
426
|
+
return score
|
|
427
|
+
}
|
|
428
|
+
return 1
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
func getProviderWeight(provider string) float64 {
|
|
432
|
+
switch provider {
|
|
433
|
+
case "Ref":
|
|
434
|
+
return 1.25
|
|
435
|
+
case "Exa", "Tavily", "Brave":
|
|
436
|
+
return 1.0
|
|
437
|
+
default:
|
|
438
|
+
return 1.0
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
func filterExcludedDomains(providerResults []providerDocs, excludeDomains []string) []providerDocs {
|
|
443
|
+
if len(excludeDomains) == 0 {
|
|
444
|
+
return providerResults
|
|
445
|
+
}
|
|
446
|
+
normalizedExclude := make(map[string]struct{}, len(excludeDomains))
|
|
447
|
+
for _, domain := range excludeDomains {
|
|
448
|
+
normalizedExclude[strings.TrimPrefix(strings.ToLower(strings.TrimSpace(domain)), "www.")] = struct{}{}
|
|
449
|
+
}
|
|
450
|
+
filtered := make([]providerDocs, 0, len(providerResults))
|
|
451
|
+
for _, providerResult := range providerResults {
|
|
452
|
+
docs := make([]SearchDoc, 0, len(providerResult.Docs))
|
|
453
|
+
for _, doc := range providerResult.Docs {
|
|
454
|
+
if _, blocked := normalizedExclude[extractDomain(doc.URL)]; !blocked {
|
|
455
|
+
docs = append(docs, doc)
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
filtered = append(filtered, providerDocs{Provider: providerResult.Provider, Docs: docs})
|
|
459
|
+
}
|
|
460
|
+
return filtered
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
func normalizeURL(raw string) string {
|
|
464
|
+
parsed, err := url.Parse(raw)
|
|
465
|
+
if err != nil || parsed.Host == "" {
|
|
466
|
+
return strings.Split(strings.TrimSuffix(strings.ToLower(raw), "/"), "?")[0]
|
|
467
|
+
}
|
|
468
|
+
base := strings.ToLower(parsed.Scheme + "://" + parsed.Host + strings.TrimSuffix(parsed.EscapedPath(), "/"))
|
|
469
|
+
tracking := map[string]struct{}{"utm_source": {}, "utm_medium": {}, "utm_campaign": {}, "utm_term": {}, "utm_content": {}, "ref": {}, "fbclid": {}, "gclid": {}}
|
|
470
|
+
vals := url.Values{}
|
|
471
|
+
for key, vs := range parsed.Query() {
|
|
472
|
+
if _, skip := tracking[strings.ToLower(key)]; skip {
|
|
473
|
+
continue
|
|
474
|
+
}
|
|
475
|
+
for _, v := range vs {
|
|
476
|
+
vals.Add(key, v)
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
if encoded := vals.Encode(); encoded != "" {
|
|
480
|
+
return base + "?" + encoded
|
|
481
|
+
}
|
|
482
|
+
return base
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
func extractDomain(raw string) string {
|
|
486
|
+
parsed, err := url.Parse(raw)
|
|
487
|
+
if err == nil && parsed.Hostname() != "" {
|
|
488
|
+
return strings.TrimPrefix(strings.ToLower(parsed.Hostname()), "www.")
|
|
489
|
+
}
|
|
490
|
+
cleaned := strings.TrimPrefix(strings.TrimPrefix(strings.ToLower(raw), "https://"), "http://")
|
|
491
|
+
cleaned = strings.TrimPrefix(cleaned, "www.")
|
|
492
|
+
return strings.Split(strings.Split(cleaned, "/")[0], "?")[0]
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
func decodeHTML(text string) string {
|
|
496
|
+
replacer := strings.NewReplacer("&", "&", "<", "<", ">", ">", """, `"`, "'", "'", "'", "'", "'", "'")
|
|
497
|
+
return replacer.Replace(text)
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
func truncateWords(s string, maxWords int) string {
|
|
501
|
+
fields := strings.Fields(strings.TrimSpace(s))
|
|
502
|
+
if len(fields) <= maxWords {
|
|
503
|
+
return strings.Join(fields, " ")
|
|
504
|
+
}
|
|
505
|
+
return strings.Join(fields[:maxWords], " ")
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
func doJSONRequest(ctx context.Context, method, rawURL string, headers map[string]string, payload any) ([]byte, error) {
|
|
509
|
+
bodyBytes, err := json.Marshal(payload)
|
|
510
|
+
if err != nil {
|
|
511
|
+
return nil, err
|
|
512
|
+
}
|
|
513
|
+
return doRawRequest(ctx, method, rawURL, headers, bodyBytes)
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
func doRawRequest(ctx context.Context, method, rawURL string, headers map[string]string, body []byte) ([]byte, error) {
|
|
517
|
+
var reader io.Reader
|
|
518
|
+
if body != nil {
|
|
519
|
+
reader = strings.NewReader(string(body))
|
|
520
|
+
}
|
|
521
|
+
req, err := http.NewRequestWithContext(ctx, method, rawURL, reader)
|
|
522
|
+
if err != nil {
|
|
523
|
+
return nil, err
|
|
524
|
+
}
|
|
525
|
+
for k, v := range headers {
|
|
526
|
+
req.Header.Set(k, v)
|
|
527
|
+
}
|
|
528
|
+
resp, err := http.DefaultClient.Do(req)
|
|
529
|
+
if err != nil {
|
|
530
|
+
return nil, err
|
|
531
|
+
}
|
|
532
|
+
defer resp.Body.Close()
|
|
533
|
+
respBody, err := io.ReadAll(resp.Body)
|
|
534
|
+
if err != nil {
|
|
535
|
+
return nil, err
|
|
536
|
+
}
|
|
537
|
+
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
538
|
+
if len(respBody) > 0 {
|
|
539
|
+
return nil, fmt.Errorf("API request failed: %s - %s", resp.Status, strings.TrimSpace(string(respBody)))
|
|
540
|
+
}
|
|
541
|
+
return nil, fmt.Errorf("API request failed: %s", resp.Status)
|
|
542
|
+
}
|
|
543
|
+
return respBody, nil
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
func stringValue(v any) string {
|
|
547
|
+
if s, ok := v.(string); ok {
|
|
548
|
+
return s
|
|
549
|
+
}
|
|
550
|
+
return ""
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
func min(a, b int) int {
|
|
554
|
+
if a < b {
|
|
555
|
+
return a
|
|
556
|
+
}
|
|
557
|
+
return b
|
|
558
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
package buildinfo
|
|
2
|
+
|
|
3
|
+
import "strings"
|
|
4
|
+
|
|
5
|
+
const defaultVersion = "dev"
|
|
6
|
+
|
|
7
|
+
// Version is overridden at build time via linker flags.
|
|
8
|
+
var Version = defaultVersion
|
|
9
|
+
|
|
10
|
+
func CurrentVersion() string {
|
|
11
|
+
trimmed := strings.TrimSpace(Version)
|
|
12
|
+
if trimmed == "" {
|
|
13
|
+
return defaultVersion
|
|
14
|
+
}
|
|
15
|
+
return trimmed
|
|
16
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "webctx",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Pure Go web search and browsing CLI using Brave, Tavily, Exa, and Firecrawl",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"author": "amxv",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "git+https://github.com/amxv/webctx.git"
|
|
10
|
+
},
|
|
11
|
+
"homepage": "https://github.com/amxv/webctx#readme",
|
|
12
|
+
"bugs": {
|
|
13
|
+
"url": "https://github.com/amxv/webctx/issues"
|
|
14
|
+
},
|
|
15
|
+
"bin": {
|
|
16
|
+
"webctx": "bin/webctx.js"
|
|
17
|
+
},
|
|
18
|
+
"config": {
|
|
19
|
+
"cliBinaryName": "webctx"
|
|
20
|
+
},
|
|
21
|
+
"files": [
|
|
22
|
+
"bin/webctx.js",
|
|
23
|
+
"scripts/postinstall.js",
|
|
24
|
+
"cmd",
|
|
25
|
+
"internal",
|
|
26
|
+
"go.mod",
|
|
27
|
+
"README.md",
|
|
28
|
+
"docs",
|
|
29
|
+
"LICENSE",
|
|
30
|
+
"AGENTS.md",
|
|
31
|
+
"CONTRIBUTORS.md",
|
|
32
|
+
"Makefile"
|
|
33
|
+
],
|
|
34
|
+
"scripts": {
|
|
35
|
+
"postinstall": "node scripts/postinstall.js",
|
|
36
|
+
"test": "node --check bin/webctx.js && node --check scripts/postinstall.js",
|
|
37
|
+
"lint": "npm run test"
|
|
38
|
+
},
|
|
39
|
+
"engines": {
|
|
40
|
+
"node": ">=18"
|
|
41
|
+
},
|
|
42
|
+
"keywords": [
|
|
43
|
+
"go",
|
|
44
|
+
"cli",
|
|
45
|
+
"search",
|
|
46
|
+
"web-search",
|
|
47
|
+
"firecrawl",
|
|
48
|
+
"exa",
|
|
49
|
+
"tavily",
|
|
50
|
+
"brave-search",
|
|
51
|
+
"agent-tools",
|
|
52
|
+
"npm",
|
|
53
|
+
"release"
|
|
54
|
+
]
|
|
55
|
+
}
|