tansaku 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/tansaku +2 -1
- data/lib/tansaku/cli.rb +9 -0
- data/lib/tansaku/crawler.rb +6 -3
- data/lib/tansaku/version.rb +1 -1
- data/renovate.json +5 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a117357045f40d3f48c578da338c27d4c4e7c5945fb58f9ed5ee1ea5351d7230
|
4
|
+
data.tar.gz: c877f44ac8d888c25ed1c272e461bb53976c75bf191acf96dce0cc82493dd11f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f67a140d2437f7431268890024e67f3bba2e10a5dc7519f6888107fef1c564c1dcc694573d42549ba5f8640a9e08c9fa20ba29024ce3fe167bf6e55d877cc54
|
7
|
+
data.tar.gz: 61785a2947f593c20b8074ad467b1e7326cbea5310fdec0c46a82d1c81139d8f446488766ea3a7f901494ede523c93740a9d67b4eba1b653888edbaf7e7cc2be
|
data/exe/tansaku
CHANGED
data/lib/tansaku/cli.rb
CHANGED
@@ -8,6 +8,7 @@ module Tansaku
|
|
8
8
|
class CLI < Thor
|
9
9
|
desc "crawl URL", "Crawl a given URL"
|
10
10
|
method_option :additional_list, desc: "Path to the file which includes additonal paths to crawl"
|
11
|
+
method_option :headers, type: :hash, default: {}, desc: "Custom headers"
|
11
12
|
method_option :host, type: :string, desc: "Host header to use"
|
12
13
|
method_option :max_concurrent_requests, type: :numeric, desc: "Number of concurrent requests to use"
|
13
14
|
method_option :type, desc: "Type of a list to crawl (admin, backup, database, etc, log or all)", default: "all"
|
@@ -22,5 +23,13 @@ module Tansaku
|
|
22
23
|
puts e
|
23
24
|
end
|
24
25
|
end
|
26
|
+
|
27
|
+
default_command :crawl
|
28
|
+
|
29
|
+
class << self
|
30
|
+
def exit_on_failure?
|
31
|
+
true
|
32
|
+
end
|
33
|
+
end
|
25
34
|
end
|
26
35
|
end
|
data/lib/tansaku/crawler.rb
CHANGED
@@ -17,6 +17,7 @@ module Tansaku
|
|
17
17
|
attr_reader :base_uri
|
18
18
|
|
19
19
|
attr_reader :additional_list
|
20
|
+
attr_reader :headers
|
20
21
|
attr_reader :host
|
21
22
|
attr_reader :max_concurrent_requests
|
22
23
|
attr_reader :type
|
@@ -25,8 +26,9 @@ module Tansaku
|
|
25
26
|
def initialize(
|
26
27
|
base_uri,
|
27
28
|
additional_list: nil,
|
29
|
+
headers: {},
|
28
30
|
host: nil,
|
29
|
-
max_concurrent_requests:
|
31
|
+
max_concurrent_requests: nil,
|
30
32
|
type: "all",
|
31
33
|
user_agent: DEFAULT_USER_AGENT
|
32
34
|
)
|
@@ -38,8 +40,9 @@ module Tansaku
|
|
38
40
|
raise ArgumentError, "Invalid path" unless valid_path?
|
39
41
|
end
|
40
42
|
|
43
|
+
@headers = headers
|
41
44
|
@host = host
|
42
|
-
@max_concurrent_requests = max_concurrent_requests
|
45
|
+
@max_concurrent_requests = max_concurrent_requests || Etc.nprocessors * 8
|
43
46
|
@type = type
|
44
47
|
@user_agent = user_agent
|
45
48
|
end
|
@@ -95,7 +98,7 @@ module Tansaku
|
|
95
98
|
end
|
96
99
|
|
97
100
|
def default_request_headers
|
98
|
-
@default_request_headers ||= { "host" => host, "user-agent" => user_agent }.compact
|
101
|
+
@default_request_headers ||= headers.merge({ "host" => host, "user-agent" => user_agent }.compact)
|
99
102
|
end
|
100
103
|
end
|
101
104
|
end
|
data/lib/tansaku/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tansaku
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manabu Niseki
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -166,6 +166,7 @@ files:
|
|
166
166
|
- lib/tansaku/monkey_patch.rb
|
167
167
|
- lib/tansaku/path.rb
|
168
168
|
- lib/tansaku/version.rb
|
169
|
+
- renovate.json
|
169
170
|
- tansaku.gemspec
|
170
171
|
homepage: https://github.com/ninoseki/tansaku
|
171
172
|
licenses:
|