webctx 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,139 @@
1
+ package app
2
+
3
+ import (
4
+ "fmt"
5
+ "io"
6
+ "strings"
7
+
8
+ "github.com/amxv/webctx/internal/buildinfo"
9
+ )
10
+
11
+ const commandName = "webctx"
12
+
13
+ var version = buildinfo.CurrentVersion()
14
+
15
+ func Run(args []string, stdout, stderr io.Writer) int {
16
+ loadEnvLocal()
17
+
18
+ if len(args) == 0 || isHelpArg(args[0]) {
19
+ _, _ = fmt.Fprintln(stdout, usageText())
20
+ return 0
21
+ }
22
+
23
+ if args[0] == "--version" || args[0] == "-v" {
24
+ _, _ = fmt.Fprintln(stdout, version)
25
+ return 0
26
+ }
27
+
28
+ tool := args[0]
29
+ flags, positional := parseArgs(args[1:])
30
+ input := ""
31
+ if len(positional) > 0 {
32
+ input = positional[0]
33
+ }
34
+
35
+ switch tool {
36
+ case "search":
37
+ query := strings.Join(positional, " ")
38
+ if strings.TrimSpace(query) == "" {
39
+ _, _ = fmt.Fprintln(stderr, "Error: search requires a query")
40
+ _, _ = fmt.Fprintln(stdout, "Usage: webctx search <query> [--exclude domains] [--keyword phrase]")
41
+ return 1
42
+ }
43
+ excludeDomains := splitCSV(flags["exclude"])
44
+ text, err := Search(SearchParams{Query: query, ExcludeDomains: excludeDomains, IncludeKeyword: flags["keyword"]})
45
+ if err != nil {
46
+ _, _ = fmt.Fprintln(stderr, err.Error())
47
+ return 1
48
+ }
49
+ _, _ = fmt.Fprintln(stdout, text)
50
+ return 0
51
+ case "read-link":
52
+ if strings.TrimSpace(input) == "" {
53
+ _, _ = fmt.Fprintln(stderr, "Error: read-link requires a URL")
54
+ _, _ = fmt.Fprintln(stdout, "Usage: webctx read-link <url>")
55
+ return 1
56
+ }
57
+ text, err := ReadLink(input)
58
+ if err != nil {
59
+ _, _ = fmt.Fprintln(stderr, err.Error())
60
+ return 1
61
+ }
62
+ _, _ = fmt.Fprintln(stdout, text)
63
+ return 0
64
+ case "map-site":
65
+ if strings.TrimSpace(input) == "" {
66
+ _, _ = fmt.Fprintln(stderr, "Error: map-site requires a URL")
67
+ _, _ = fmt.Fprintln(stdout, "Usage: webctx map-site <url>")
68
+ return 1
69
+ }
70
+ text, err := MapSite(input)
71
+ if err != nil {
72
+ _, _ = fmt.Fprintln(stderr, err.Error())
73
+ return 1
74
+ }
75
+ _, _ = fmt.Fprintln(stdout, text)
76
+ return 0
77
+ default:
78
+ _, _ = fmt.Fprintln(stderr, "Unknown tool:", tool)
79
+ _, _ = fmt.Fprintln(stdout, usageText())
80
+ return 1
81
+ }
82
+ }
83
+
84
+ func usageText() string {
85
+ return fmt.Sprintf(`webctx v%s - Web search & browsing CLI
86
+
87
+ Usage:
88
+ webctx search <query> [--exclude domain1,domain2] [--keyword phrase]
89
+ webctx read-link <url>
90
+ webctx map-site <url>
91
+
92
+ Examples:
93
+ webctx search "next.js server components"
94
+ webctx search "react hooks" --exclude youtube.com,vimeo.com
95
+ webctx search "drizzle orm" --keyword "migration guide"
96
+ webctx read-link https://docs.example.com/guide
97
+ webctx map-site https://example.com`, version)
98
+ }
99
+
100
+ func parseArgs(args []string) (map[string]string, []string) {
101
+ flags := map[string]string{}
102
+ positional := make([]string, 0, len(args))
103
+ for i := 0; i < len(args); i++ {
104
+ if strings.HasPrefix(args[i], "--") && i+1 < len(args) {
105
+ flags[strings.TrimPrefix(args[i], "--")] = args[i+1]
106
+ i++
107
+ continue
108
+ }
109
+ positional = append(positional, args[i])
110
+ }
111
+ return flags, positional
112
+ }
113
+
114
+ func splitCSV(v string) []string {
115
+ if strings.TrimSpace(v) == "" {
116
+ return nil
117
+ }
118
+ parts := strings.Split(v, ",")
119
+ out := make([]string, 0, len(parts))
120
+ for _, part := range parts {
121
+ trimmed := strings.TrimSpace(part)
122
+ if trimmed != "" {
123
+ out = append(out, trimmed)
124
+ }
125
+ }
126
+ if len(out) == 0 {
127
+ return nil
128
+ }
129
+ return out
130
+ }
131
+
132
+ func isHelpArg(v string) bool {
133
+ switch v {
134
+ case "-h", "--help", "help":
135
+ return true
136
+ default:
137
+ return false
138
+ }
139
+ }
@@ -0,0 +1,77 @@
1
+ package app
2
+
3
+ import (
4
+ "bytes"
5
+ "strings"
6
+ "testing"
7
+ )
8
+
9
+ func TestRunRootHelp(t *testing.T) {
10
+ var out bytes.Buffer
11
+ var errBuf bytes.Buffer
12
+
13
+ code := Run([]string{"--help"}, &out, &errBuf)
14
+ if code != 0 {
15
+ t.Fatalf("Run returned code %d", code)
16
+ }
17
+ if !strings.Contains(out.String(), "webctx v") || !strings.Contains(out.String(), "read-link") {
18
+ t.Fatalf("unexpected help output: %q", out.String())
19
+ }
20
+ }
21
+
22
+ func TestRunVersion(t *testing.T) {
23
+ var out bytes.Buffer
24
+ var errBuf bytes.Buffer
25
+
26
+ code := Run([]string{"--version"}, &out, &errBuf)
27
+ if code != 0 {
28
+ t.Fatalf("Run returned code %d", code)
29
+ }
30
+ if strings.TrimSpace(out.String()) == "" {
31
+ t.Fatalf("unexpected empty version output")
32
+ }
33
+ if strings.Contains(out.String(), "webctx ") {
34
+ t.Fatalf("expected bare version output, got: %q", out.String())
35
+ }
36
+ }
37
+
38
+ func TestRunSearchWithoutQuery(t *testing.T) {
39
+ var out bytes.Buffer
40
+ var errBuf bytes.Buffer
41
+
42
+ code := Run([]string{"search"}, &out, &errBuf)
43
+ if code != 1 {
44
+ t.Fatalf("expected exit code 1, got %d", code)
45
+ }
46
+ if !strings.Contains(errBuf.String(), "search requires a query") {
47
+ t.Fatalf("unexpected stderr: %q", errBuf.String())
48
+ }
49
+ }
50
+
51
+ func TestNormalizeURL(t *testing.T) {
52
+ got := normalizeURL("https://Example.com/docs/?utm_source=x&ref=y&q=go")
53
+ want := "https://example.com/docs?q=go"
54
+ if got != want {
55
+ t.Fatalf("normalizeURL mismatch: got %q want %q", got, want)
56
+ }
57
+ }
58
+
59
+ func TestScoreAndRankResultsDuplicateBoost(t *testing.T) {
60
+ results, duplicatesRemoved := scoreAndRankResults([]providerDocs{
61
+ {Provider: "Brave", Docs: []SearchDoc{{URL: "https://a.com", Title: "A"}, {URL: "https://b.com", Title: "B"}}},
62
+ {Provider: "Tavily", Docs: []SearchDoc{{URL: "https://b.com", Title: "B2"}, {URL: "https://c.com", Title: "C"}}},
63
+ })
64
+ if duplicatesRemoved != 1 {
65
+ t.Fatalf("expected 1 duplicate removed, got %d", duplicatesRemoved)
66
+ }
67
+ if len(results) == 0 || results[0].URL != "https://b.com" {
68
+ t.Fatalf("expected duplicate URL to rank first, got %#v", results)
69
+ }
70
+ }
71
+
72
+ func TestParseGitHubURL(t *testing.T) {
73
+ info := parseGitHubURL("https://github.com/amxv/webctx-ts/blob/main/cli.ts")
74
+ if info == nil || !info.IsFile || info.Owner != "amxv" || info.Repo != "webctx-ts" || info.Branch != "main" || info.Path != "cli.ts" {
75
+ t.Fatalf("unexpected parse result: %#v", info)
76
+ }
77
+ }
@@ -0,0 +1,310 @@
1
+ package app
2
+
3
+ import (
4
+ "context"
5
+ "fmt"
6
+ "io"
7
+ "net/http"
8
+ "net/url"
9
+ "os"
10
+ "path/filepath"
11
+ "strings"
12
+ "sync"
13
+ "time"
14
+ )
15
+
16
+ type MarkdownResult struct {
17
+ URL string
18
+ Title string
19
+ Markdown string
20
+ }
21
+
22
+ type githubURLInfo struct {
23
+ Owner string
24
+ Repo string
25
+ Branch string
26
+ Path string
27
+ IsFile bool
28
+ }
29
+
30
+ func parseGitHubURL(raw string) *githubURLInfo {
31
+ parsed, err := url.Parse(raw)
32
+ if err != nil || parsed.Hostname() != "github.com" {
33
+ return nil
34
+ }
35
+ parts := strings.FieldsFunc(strings.TrimPrefix(parsed.Path, "/"), func(r rune) bool { return r == '/' })
36
+ if len(parts) < 2 {
37
+ return nil
38
+ }
39
+ info := &githubURLInfo{Owner: parts[0], Repo: parts[1], IsFile: true}
40
+ if len(parts) == 2 {
41
+ return info
42
+ }
43
+ switch parts[2] {
44
+ case "tree":
45
+ info.IsFile = false
46
+ if len(parts) > 3 {
47
+ info.Branch = parts[3]
48
+ }
49
+ if len(parts) > 4 {
50
+ info.Path = strings.Join(parts[4:], "/")
51
+ }
52
+ return info
53
+ case "blob":
54
+ if len(parts) > 3 {
55
+ info.Branch = parts[3]
56
+ }
57
+ if len(parts) > 4 {
58
+ info.Path = strings.Join(parts[4:], "/")
59
+ }
60
+ return info
61
+ default:
62
+ return nil
63
+ }
64
+ }
65
+
66
+ func convertToRawGitHubURL(info *githubURLInfo) string {
67
+ if info.Path == "" {
68
+ return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/HEAD/README.md", info.Owner, info.Repo)
69
+ }
70
+ branch := info.Branch
71
+ if branch == "" {
72
+ branch = "HEAD"
73
+ }
74
+ return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", info.Owner, info.Repo, branch, info.Path)
75
+ }
76
+
77
+ func fetchGitHubRawContent(raw string) (*MarkdownResult, error) {
78
+ info := parseGitHubURL(raw)
79
+ if info == nil || !info.IsFile {
80
+ return nil, nil
81
+ }
82
+ ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
83
+ defer cancel()
84
+ content, status, err := fetchText(ctx, convertToRawGitHubURL(info))
85
+ if err != nil || status < 200 || status >= 300 {
86
+ if info.Path == "" && status == http.StatusNotFound {
87
+ for _, alt := range []string{"readme.md", "Readme.md", "README"} {
88
+ altURL := fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/HEAD/%s", info.Owner, info.Repo, alt)
89
+ content, status, err = fetchText(ctx, altURL)
90
+ if err == nil && status >= 200 && status < 300 {
91
+ title := firstHeadingOrFallback(content, info.Owner+"/"+info.Repo)
92
+ return &MarkdownResult{URL: raw, Title: title, Markdown: content}, nil
93
+ }
94
+ }
95
+ }
96
+ return nil, nil
97
+ }
98
+ title := firstHeadingOrFallback(content, fallbackGitHubTitle(info))
99
+ return &MarkdownResult{URL: raw, Title: title, Markdown: content}, nil
100
+ }
101
+
102
+ func checkMarkdownAvailable(raw string) (bool, error) {
103
+ mdURL := raw
104
+ if !strings.HasSuffix(strings.ToLower(mdURL), ".md") {
105
+ mdURL += ".md"
106
+ }
107
+ ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
108
+ defer cancel()
109
+ req, err := http.NewRequestWithContext(ctx, http.MethodHead, mdURL, nil)
110
+ if err != nil {
111
+ return false, err
112
+ }
113
+ resp, err := http.DefaultClient.Do(req)
114
+ if err != nil {
115
+ return false, err
116
+ }
117
+ defer resp.Body.Close()
118
+ if resp.StatusCode < 200 || resp.StatusCode >= 300 {
119
+ return false, nil
120
+ }
121
+ contentType := strings.ToLower(resp.Header.Get("Content-Type"))
122
+ contentLength := resp.ContentLength
123
+ return (strings.Contains(contentType, "markdown") || strings.Contains(contentType, "text/plain")) && contentLength > 50, nil
124
+ }
125
+
126
+ func fetchMarkdownContent(raw string) (*MarkdownResult, error) {
127
+ mdURL := raw
128
+ if !strings.HasSuffix(strings.ToLower(mdURL), ".md") {
129
+ mdURL += ".md"
130
+ }
131
+ ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
132
+ defer cancel()
133
+ content, status, err := fetchText(ctx, mdURL)
134
+ if err != nil {
135
+ return nil, err
136
+ }
137
+ if status < 200 || status >= 300 {
138
+ return nil, fmt.Errorf("Failed to fetch markdown: %d", status)
139
+ }
140
+ title := firstHeadingOrFallback(content, filepath.Base(raw))
141
+ return &MarkdownResult{URL: raw, Title: title, Markdown: content}, nil
142
+ }
143
+
144
+ func fetchText(ctx context.Context, rawURL string) (string, int, error) {
145
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
146
+ if err != nil {
147
+ return "", 0, err
148
+ }
149
+ resp, err := http.DefaultClient.Do(req)
150
+ if err != nil {
151
+ return "", 0, err
152
+ }
153
+ defer resp.Body.Close()
154
+ body, err := io.ReadAll(resp.Body)
155
+ if err != nil {
156
+ return "", resp.StatusCode, err
157
+ }
158
+ return string(body), resp.StatusCode, nil
159
+ }
160
+
161
+ func firstHeadingOrFallback(markdown, fallback string) string {
162
+ for _, line := range strings.Split(markdown, "\n") {
163
+ trimmed := strings.TrimSpace(line)
164
+ if strings.HasPrefix(trimmed, "# ") {
165
+ return strings.TrimSpace(strings.TrimPrefix(trimmed, "# "))
166
+ }
167
+ }
168
+ if strings.TrimSpace(fallback) == "" {
169
+ return "Document"
170
+ }
171
+ return fallback
172
+ }
173
+
174
+ func fallbackGitHubTitle(info *githubURLInfo) string {
175
+ if info.Path != "" {
176
+ parts := strings.Split(info.Path, "/")
177
+ return parts[len(parts)-1]
178
+ }
179
+ return info.Owner + "/" + info.Repo
180
+ }
181
+
182
+ type tokenBucketRateLimiter struct {
183
+ mu sync.Mutex
184
+ tokens int
185
+ maxTokens int
186
+ refillRate int
187
+ refillInterval time.Duration
188
+ lastRefill time.Time
189
+ }
190
+
191
+ func newFirecrawlRateLimiter() *tokenBucketRateLimiter {
192
+ return &tokenBucketRateLimiter{tokens: 10, maxTokens: 10, refillRate: 1, refillInterval: 6 * time.Second, lastRefill: time.Now()}
193
+ }
194
+
195
+ func (r *tokenBucketRateLimiter) refill() {
196
+ now := time.Now()
197
+ elapsed := now.Sub(r.lastRefill)
198
+ if elapsed < r.refillInterval {
199
+ return
200
+ }
201
+ intervals := int(elapsed / r.refillInterval)
202
+ if intervals <= 0 {
203
+ return
204
+ }
205
+ r.tokens += intervals * r.refillRate
206
+ if r.tokens > r.maxTokens {
207
+ r.tokens = r.maxTokens
208
+ }
209
+ r.lastRefill = r.lastRefill.Add(time.Duration(intervals) * r.refillInterval)
210
+ }
211
+
212
+ func (r *tokenBucketRateLimiter) acquire(ctx context.Context) error {
213
+ for {
214
+ r.mu.Lock()
215
+ r.refill()
216
+ if r.tokens > 0 {
217
+ r.tokens--
218
+ r.mu.Unlock()
219
+ return nil
220
+ }
221
+ wait := r.refillInterval - time.Since(r.lastRefill) + 100*time.Millisecond
222
+ r.mu.Unlock()
223
+ if wait < 100*time.Millisecond {
224
+ wait = 100 * time.Millisecond
225
+ }
226
+ select {
227
+ case <-ctx.Done():
228
+ return ctx.Err()
229
+ case <-time.After(wait):
230
+ }
231
+ }
232
+ }
233
+
234
+ type firecrawlQueue struct {
235
+ rateLimiter *tokenBucketRateLimiter
236
+ mu sync.Mutex
237
+ }
238
+
239
+ var (
240
+ queueOnce sync.Once
241
+ queueInst *firecrawlQueue
242
+ )
243
+
244
+ func getFirecrawlQueue() *firecrawlQueue {
245
+ queueOnce.Do(func() {
246
+ queueInst = &firecrawlQueue{rateLimiter: newFirecrawlRateLimiter()}
247
+ })
248
+ return queueInst
249
+ }
250
+
251
+ func (q *firecrawlQueue) enqueue(_ string, requestFn func() (map[string]any, error)) (map[string]any, error) {
252
+ q.mu.Lock()
253
+ defer q.mu.Unlock()
254
+ ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second)
255
+ defer cancel()
256
+ if err := q.rateLimiter.acquire(ctx); err != nil {
257
+ return nil, err
258
+ }
259
+ return requestFn()
260
+ }
261
+
262
+ func loadEnvLocal() {
263
+ for _, candidate := range envLocalCandidates() {
264
+ loadDotEnvFile(candidate)
265
+ }
266
+ }
267
+
268
+ func envLocalCandidates() []string {
269
+ candidates := []string{}
270
+ if cwd, err := os.Getwd(); err == nil {
271
+ candidates = append(candidates, filepath.Join(cwd, ".env.local"))
272
+ }
273
+ if exe, err := os.Executable(); err == nil {
274
+ exeDir := filepath.Dir(exe)
275
+ candidates = append(candidates, filepath.Join(exeDir, ".env.local"), filepath.Join(filepath.Dir(exeDir), ".env.local"))
276
+ }
277
+ seen := map[string]struct{}{}
278
+ unique := []string{}
279
+ for _, c := range candidates {
280
+ if _, ok := seen[c]; ok {
281
+ continue
282
+ }
283
+ seen[c] = struct{}{}
284
+ unique = append(unique, c)
285
+ }
286
+ return unique
287
+ }
288
+
289
+ func loadDotEnvFile(path string) {
290
+ data, err := os.ReadFile(path)
291
+ if err != nil {
292
+ return
293
+ }
294
+ for _, line := range strings.Split(string(data), "\n") {
295
+ trimmed := strings.TrimSpace(line)
296
+ if trimmed == "" || strings.HasPrefix(trimmed, "#") {
297
+ continue
298
+ }
299
+ trimmed = strings.TrimPrefix(trimmed, "export ")
300
+ key, value, ok := strings.Cut(trimmed, "=")
301
+ if !ok {
302
+ continue
303
+ }
304
+ key = strings.TrimSpace(key)
305
+ value = strings.Trim(strings.TrimSpace(value), `"'`)
306
+ if key != "" {
307
+ _ = os.Setenv(key, value)
308
+ }
309
+ }
310
+ }