kimurai 1.3.2 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +7 -6
- data/lib/kimurai/base.rb +5 -1
- data/lib/kimurai/browser_builder.rb +8 -26
- data/lib/kimurai/browser_builder/mechanize_builder.rb +120 -116
- data/lib/kimurai/browser_builder/poltergeist_phantomjs_builder.rb +139 -135
- data/lib/kimurai/browser_builder/selenium_chrome_builder.rb +152 -148
- data/lib/kimurai/browser_builder/selenium_firefox_builder.rb +161 -157
- data/lib/kimurai/capybara_ext/session.rb +10 -1
- data/lib/kimurai/capybara_ext/session/config.rb +1 -1
- data/lib/kimurai/cli.rb +5 -1
- data/lib/kimurai/template/Gemfile +1 -1
- data/lib/kimurai/template/spiders/application_spider.rb +6 -0
- data/lib/kimurai/version.rb +1 -1
- metadata +2 -2
@@ -88,7 +88,16 @@ module Capybara
|
|
88
88
|
def current_response(response_type = :html)
|
89
89
|
case response_type
|
90
90
|
when :html
|
91
|
-
|
91
|
+
if config.encoding
|
92
|
+
if config.encoding == :auto
|
93
|
+
charset = body.force_encoding("ISO-8859-1").encode("UTF-8")[/<meta.*?charset=["]?([\w+\d+\-]*)/i, 1]
|
94
|
+
Nokogiri::HTML(body, nil, charset)
|
95
|
+
else
|
96
|
+
Nokogiri::HTML(body, nil, config.encoding)
|
97
|
+
end
|
98
|
+
else
|
99
|
+
Nokogiri::HTML(body)
|
100
|
+
end
|
92
101
|
when :json
|
93
102
|
JSON.parse(body)
|
94
103
|
end
|
data/lib/kimurai/cli.rb
CHANGED
@@ -116,7 +116,11 @@ module Kimurai
|
|
116
116
|
end
|
117
117
|
|
118
118
|
engine = options["engine"]&.delete(":")&.to_sym
|
119
|
-
|
119
|
+
if url = options["url"]
|
120
|
+
klass.new(engine).request_to(:console, url: options["url"])
|
121
|
+
else
|
122
|
+
klass.new(engine).public_send(:console)
|
123
|
+
end
|
120
124
|
end
|
121
125
|
|
122
126
|
desc "list", "List all available spiders in the current project"
|
@@ -100,6 +100,12 @@ class ApplicationSpider < Kimurai::Base
|
|
100
100
|
# Format: same like for `skip_request_errors` option.
|
101
101
|
# retry_request_errors: [Net::ReadTimeout],
|
102
102
|
|
103
|
+
# Handle page encoding while parsing html response using Nokogiri. There are two modes:
|
104
|
+
# Auto (`:auto`) (try to fetch correct encoding from <meta http-equiv="Content-Type"> or <meta charset> tags)
|
105
|
+
# Set required encoding manually, example: `encoding: "GB2312"` (Set required encoding manually)
|
106
|
+
# Default this option is unset.
|
107
|
+
# encoding: nil,
|
108
|
+
|
103
109
|
# Restart browser if one of the options is true:
|
104
110
|
restart_if: {
|
105
111
|
# Restart browser if provided memory limit (in kilobytes) is exceeded (works for all engines)
|
data/lib/kimurai/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kimurai
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Afanasev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|