kimurai 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -88,7 +88,16 @@ module Capybara
88
88
  def current_response(response_type = :html)
89
89
  case response_type
90
90
  when :html
91
- Nokogiri::HTML(body)
91
+ if config.encoding
92
+ if config.encoding == :auto
93
+ charset = body.force_encoding("ISO-8859-1").encode("UTF-8")[/<meta.*?charset=["]?([\w+\d+\-]*)/i, 1]
94
+ Nokogiri::HTML(body, nil, charset)
95
+ else
96
+ Nokogiri::HTML(body, nil, config.encoding)
97
+ end
98
+ else
99
+ Nokogiri::HTML(body)
100
+ end
92
101
  when :json
93
102
  JSON.parse(body)
94
103
  end
@@ -1,6 +1,6 @@
1
1
  module Capybara
2
2
  class SessionConfig
3
- attr_accessor :cookies, :proxy, :user_agent
3
+ attr_accessor :cookies, :proxy, :user_agent, :encoding
4
4
  attr_writer :retry_request_errors, :skip_request_errors
5
5
 
6
6
  def retry_request_errors
@@ -116,7 +116,11 @@ module Kimurai
116
116
  end
117
117
 
118
118
  engine = options["engine"]&.delete(":")&.to_sym
119
- klass.parse!(:console, engine, url: options["url"])
119
+ if url = options["url"]
120
+ klass.new(engine).request_to(:console, url: options["url"])
121
+ else
122
+ klass.new(engine).public_send(:console)
123
+ end
120
124
  end
121
125
 
122
126
  desc "list", "List all available spiders in the current project"
@@ -4,7 +4,7 @@ git_source(:github) { |repo| "https://github.com/#{repo}.git" }
4
4
  ruby '>= 2.5'
5
5
 
6
6
  # Framework
7
- gem 'kimurai', '~> 1.0'
7
+ gem 'kimurai', '~> 1.4'
8
8
 
9
9
  # Require files in directory and child directories recursively
10
10
  gem 'require_all'
@@ -100,6 +100,12 @@ class ApplicationSpider < Kimurai::Base
100
100
  # Format: same like for `skip_request_errors` option.
101
101
  # retry_request_errors: [Net::ReadTimeout],
102
102
 
103
+ # Handle page encoding while parsing html response using Nokogiri. There are two modes:
104
+ # Auto (`:auto`) (try to fetch correct encoding from <meta http-equiv="Content-Type"> or <meta charset> tags)
105
+ # Set required encoding manually, example: `encoding: "GB2312"` (Set required encoding manually)
106
+ # Default this option is unset.
107
+ # encoding: nil,
108
+
103
109
  # Restart browser if one of the options is true:
104
110
  restart_if: {
105
111
  # Restart browser if provided memory limit (in kilobytes) is exceeded (works for all engines)
@@ -1,3 +1,3 @@
1
1
  module Kimurai
2
- VERSION = "1.3.2"
2
+ VERSION = "1.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kimurai
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.2
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Afanasev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-12-08 00:00:00.000000000 Z
11
+ date: 2019-01-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor