crawler-user-agents 1.49.0 → 1.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ # Build a statically linked clf-filter binary and attach it to the GitHub release for each tag.
2
+
3
+ name: Release binary
4
+
5
+ on:
6
+ push:
7
+ tags:
8
+ - 'v*'
9
+
10
+ jobs:
11
+ release-binary:
12
+ runs-on: ubuntu-latest
13
+ permissions:
14
+ contents: write
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - uses: actions/setup-go@v5
18
+ with:
19
+ go-version-file: go.mod
20
+ - name: Build static binary
21
+ run: CGO_ENABLED=0 go build -ldflags="-w -s" -o clf-filter ./cmd/clf-filter/
22
+ - name: Upload binary to release
23
+ uses: softprops/action-gh-release@v2
24
+ with:
25
+ files: clf-filter
@@ -0,0 +1,51 @@
1
+ // clf-filter reads Combined Log Format lines from stdin and writes them to stdout,
2
+ // removing bot/crawler lines by default. Use --bot to keep only bot lines.
3
+ package main
4
+
5
+ import (
6
+ "bufio"
7
+ "flag"
8
+ "fmt"
9
+ "os"
10
+ "strings"
11
+
12
+ agents "github.com/monperrus/crawler-user-agents"
13
+ )
14
+
15
+ func extractUserAgent(line string) (string, bool) {
16
+ // Combined Log Format ends with: "referer" "user-agent"
17
+ // Find the last quoted field.
18
+ end := strings.LastIndex(line, "\"")
19
+ if end < 1 {
20
+ return "", false
21
+ }
22
+ start := strings.LastIndex(line[:end], "\"")
23
+ if start < 0 {
24
+ return "", false
25
+ }
26
+ return line[start+1 : end], true
27
+ }
28
+
29
+ func main() {
30
+ botOnly := flag.Bool("bot", false, "keep only bot/crawler lines (default: remove bots)")
31
+ flag.Parse()
32
+
33
+ scanner := bufio.NewScanner(os.Stdin)
34
+ // Support long lines (e.g. large URLs).
35
+ scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
36
+
37
+ for scanner.Scan() {
38
+ line := scanner.Text()
39
+ ua, ok := extractUserAgent(line)
40
+ isBot := ok && agents.IsCrawler(ua)
41
+
42
+ if *botOnly == isBot {
43
+ fmt.Println(line)
44
+ }
45
+ }
46
+
47
+ if err := scanner.Err(); err != nil {
48
+ fmt.Fprintln(os.Stderr, "clf-filter: read error:", err)
49
+ os.Exit(1)
50
+ }
51
+ }
@@ -18363,5 +18363,17 @@
18363
18363
  "tags": [
18364
18364
  "monitoring"
18365
18365
  ]
18366
+ },
18367
+ {
18368
+ "pattern": "GeedoShopProductFinder",
18369
+ "addition_date": "2026/06/17",
18370
+ "url": "https://geedo.com/product-finder/",
18371
+ "instances": [
18372
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko; GeedoShopProductFinder) Chrome/142.0.0.0 Safari/537.36"
18373
+ ],
18374
+ "description": "GeedoShopProductFinder is the automated crawler used by Geedo, a global product-search engine specialized in online retail.",
18375
+ "tags": [
18376
+ "search-engine"
18377
+ ]
18366
18378
  }
18367
18379
  ]
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawler-user-agents",
3
- "version": "1.49.0",
3
+ "version": "1.51.0",
4
4
  "main": "crawler-user-agents.json",
5
5
  "typings": "./index.d.ts",
6
6
  "exports": {