crawler-user-agents 1.49.0 → 1.50.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ # Build a statically linked clf-filter binary and attach it to the GitHub release for each tag.
2
+
3
+ name: Release binary
4
+
5
+ on:
6
+ push:
7
+ tags:
8
+ - 'v*'
9
+
10
+ jobs:
11
+ release-binary:
12
+ runs-on: ubuntu-latest
13
+ permissions:
14
+ contents: write
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - uses: actions/setup-go@v5
18
+ with:
19
+ go-version-file: go.mod
20
+ - name: Build static binary
21
+ run: CGO_ENABLED=0 go build -ldflags="-w -s" -o clf-filter ./cmd/clf-filter/
22
+ - name: Upload binary to release
23
+ uses: softprops/action-gh-release@v2
24
+ with:
25
+ files: clf-filter
@@ -0,0 +1,51 @@
1
+ // clf-filter reads Combined Log Format lines from stdin and writes them to stdout,
2
+ // removing bot/crawler lines by default. Use --bot to keep only bot lines.
3
+ package main
4
+
5
+ import (
6
+ "bufio"
7
+ "flag"
8
+ "fmt"
9
+ "os"
10
+ "strings"
11
+
12
+ agents "github.com/monperrus/crawler-user-agents"
13
+ )
14
+
15
+ func extractUserAgent(line string) (string, bool) {
16
+ // Combined Log Format ends with: "referer" "user-agent"
17
+ // Find the last quoted field.
18
+ end := strings.LastIndex(line, "\"")
19
+ if end < 1 {
20
+ return "", false
21
+ }
22
+ start := strings.LastIndex(line[:end], "\"")
23
+ if start < 0 {
24
+ return "", false
25
+ }
26
+ return line[start+1 : end], true
27
+ }
28
+
29
+ func main() {
30
+ botOnly := flag.Bool("bot", false, "keep only bot/crawler lines (default: remove bots)")
31
+ flag.Parse()
32
+
33
+ scanner := bufio.NewScanner(os.Stdin)
34
+ // Support long lines (e.g. large URLs).
35
+ scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
36
+
37
+ for scanner.Scan() {
38
+ line := scanner.Text()
39
+ ua, ok := extractUserAgent(line)
40
+ isBot := ok && agents.IsCrawler(ua)
41
+
42
+ if *botOnly == isBot {
43
+ fmt.Println(line)
44
+ }
45
+ }
46
+
47
+ if err := scanner.Err(); err != nil {
48
+ fmt.Fprintln(os.Stderr, "clf-filter: read error:", err)
49
+ os.Exit(1)
50
+ }
51
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawler-user-agents",
3
- "version": "1.49.0",
3
+ "version": "1.50.0",
4
4
  "main": "crawler-user-agents.json",
5
5
  "typings": "./index.d.ts",
6
6
  "exports": {