tansaku 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/tansaku +2 -1
- data/lib/tansaku/cli.rb +9 -0
- data/lib/tansaku/crawler.rb +6 -3
- data/lib/tansaku/version.rb +1 -1
- data/renovate.json +5 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a117357045f40d3f48c578da338c27d4c4e7c5945fb58f9ed5ee1ea5351d7230
|
4
|
+
data.tar.gz: c877f44ac8d888c25ed1c272e461bb53976c75bf191acf96dce0cc82493dd11f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f67a140d2437f7431268890024e67f3bba2e10a5dc7519f6888107fef1c564c1dcc694573d42549ba5f8640a9e08c9fa20ba29024ce3fe167bf6e55d877cc54
|
7
|
+
data.tar.gz: 61785a2947f593c20b8074ad467b1e7326cbea5310fdec0c46a82d1c81139d8f446488766ea3a7f901494ede523c93740a9d67b4eba1b653888edbaf7e7cc2be
|
data/exe/tansaku
CHANGED
data/lib/tansaku/cli.rb
CHANGED
@@ -8,6 +8,7 @@ module Tansaku
|
|
8
8
|
class CLI < Thor
|
9
9
|
desc "crawl URL", "Crawl a given URL"
|
10
10
|
method_option :additional_list, desc: "Path to the file which includes additonal paths to crawl"
|
11
|
+
method_option :headers, type: :hash, default: {}, desc: "Custom headers"
|
11
12
|
method_option :host, type: :string, desc: "Host header to use"
|
12
13
|
method_option :max_concurrent_requests, type: :numeric, desc: "Number of concurrent requests to use"
|
13
14
|
method_option :type, desc: "Type of a list to crawl (admin, backup, database, etc, log or all)", default: "all"
|
@@ -22,5 +23,13 @@ module Tansaku
|
|
22
23
|
puts e
|
23
24
|
end
|
24
25
|
end
|
26
|
+
|
27
|
+
default_command :crawl
|
28
|
+
|
29
|
+
class << self
|
30
|
+
def exit_on_failure?
|
31
|
+
true
|
32
|
+
end
|
33
|
+
end
|
25
34
|
end
|
26
35
|
end
|
data/lib/tansaku/crawler.rb
CHANGED
@@ -17,6 +17,7 @@ module Tansaku
|
|
17
17
|
attr_reader :base_uri
|
18
18
|
|
19
19
|
attr_reader :additional_list
|
20
|
+
attr_reader :headers
|
20
21
|
attr_reader :host
|
21
22
|
attr_reader :max_concurrent_requests
|
22
23
|
attr_reader :type
|
@@ -25,8 +26,9 @@ module Tansaku
|
|
25
26
|
def initialize(
|
26
27
|
base_uri,
|
27
28
|
additional_list: nil,
|
29
|
+
headers: {},
|
28
30
|
host: nil,
|
29
|
-
max_concurrent_requests:
|
31
|
+
max_concurrent_requests: nil,
|
30
32
|
type: "all",
|
31
33
|
user_agent: DEFAULT_USER_AGENT
|
32
34
|
)
|
@@ -38,8 +40,9 @@ module Tansaku
|
|
38
40
|
raise ArgumentError, "Invalid path" unless valid_path?
|
39
41
|
end
|
40
42
|
|
43
|
+
@headers = headers
|
41
44
|
@host = host
|
42
|
-
@max_concurrent_requests = max_concurrent_requests
|
45
|
+
@max_concurrent_requests = max_concurrent_requests || Etc.nprocessors * 8
|
43
46
|
@type = type
|
44
47
|
@user_agent = user_agent
|
45
48
|
end
|
@@ -95,7 +98,7 @@ module Tansaku
|
|
95
98
|
end
|
96
99
|
|
97
100
|
def default_request_headers
|
98
|
-
@default_request_headers ||= { "host" => host, "user-agent" => user_agent }.compact
|
101
|
+
@default_request_headers ||= headers.merge({ "host" => host, "user-agent" => user_agent }.compact)
|
99
102
|
end
|
100
103
|
end
|
101
104
|
end
|
data/lib/tansaku/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tansaku
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manabu Niseki
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -166,6 +166,7 @@ files:
|
|
166
166
|
- lib/tansaku/monkey_patch.rb
|
167
167
|
- lib/tansaku/path.rb
|
168
168
|
- lib/tansaku/version.rb
|
169
|
+
- renovate.json
|
169
170
|
- tansaku.gemspec
|
170
171
|
homepage: https://github.com/ninoseki/tansaku
|
171
172
|
licenses:
|