maxwell 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4dee2d7fc3af9b8e4dc64b1b6588be1735aa9f35
4
- data.tar.gz: cc1b31de45500795d3630bad15a2b4c0dc0b6117
3
+ metadata.gz: 0b4a07c10e6638f009e5d96fa34d98d2c92985aa
4
+ data.tar.gz: 641ac72166235db4b28e7ccebee77e8da1b7c3e0
5
5
  SHA512:
6
- metadata.gz: ca1d52b57421703c92f3e2697f56dca22765baca6180b4e28da0669474b74d4ad580ff709c8934b3c173f44694d991e760c579f8c2469f1ad0770b839b7caf47
7
- data.tar.gz: 7ad8eee5272c711110a5111a23b7ce450f4f5ceb5a1fe7b5943be57980d5ed1504d09f02707c9474b0c39df269db901e0e86124343ee6bf043339d6f639591b6
6
+ metadata.gz: 04d92bce282bbbf263c83b1434f05cbd9043a169bf893a360a80701755d645aef03c60af4a69187ac763c8a1fa95e1596854d175fd0aaa6d66c787c3eab36081
7
+ data.tar.gz: 8cfb57804bc5575573a30cb10166cf46b8c6761969ca8bbdef1accd758c16d0823a9c6c203103bf45a244303f9c54f8c26ff1f709425d3c944e7ec7767e7819b
data/README.md CHANGED
@@ -21,19 +21,22 @@ Or install it yourself as:
21
21
  ## Usage
22
22
 
23
23
  ```ruby
24
- Maxwell::DO({
25
- "root url" => {
26
- "css selector to links" => {
27
- "css selector to links" => ->(html) {
28
- html.title
29
- # write parser here
30
- }
31
- }
32
- }
33
- }) do |result|
34
- p result
35
- # write code for handling results
24
+ class YahooScraper < Maxwell::Base
25
+ attr_scrape :title, :url, :address
26
+
27
+ regist_strategy("h3.slcHead.cFix a") do
28
+ @title = @html.title
29
+ @url = @html.css("td.sdhk jdj").text
30
+ @address = @html.css("table tr.ddad").text
31
+ end
32
+
33
+ regist_handler do |result|
34
+ p result
35
+ end
36
+
36
37
  end
38
+
39
+ YahooScraper.new.execute("https://www.yahoo.com/")
37
40
  ```
38
41
 
39
42
  ## Development
@@ -1,22 +1,45 @@
1
1
  require 'nokogiri'
2
2
  require 'httpclient'
3
3
 
4
- class Maxwell
5
- module Converter
6
- def self.execute(url)
7
- client = HTTPClient.new(
8
- default_header: {
9
- "User-Agent" => "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
10
- }
11
- )
4
+ require 'nokogiri'
5
+ require 'capybara'
6
+ require 'capybara/poltergeist'
12
7
 
13
- html = begin
14
- client.get_content(url)
15
- rescue
16
- ""
8
+ module Maxwell
9
+ class Converter
10
+ @user_agent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
11
+ class << self
12
+ def call(url, use_poltergeist=false)
13
+ use_poltergeist ? call_with_js(url) : call_without_js(url)
17
14
  end
18
15
 
19
- Nokogiri::HTML(html)
16
+ def call_without_js(url)
17
+ client = HTTPClient.new(
18
+ default_header: {
19
+ "User-Agent" => @user_agent
20
+ }
21
+ )
22
+
23
+ html = begin
24
+ client.get_content(url)
25
+ rescue
26
+ ""
27
+ end
28
+
29
+ Nokogiri::HTML(html)
30
+ end
31
+
32
+ def call_with_js(url)
33
+ Capybara.register_driver :poltergeist do |app|
34
+ Capybara::Poltergeist::Driver.new(app, { js_errors: false, timeout: 1000 })
35
+ end
36
+ Capybara.default_selector = :xpath
37
+ session = Capybara::Session.new(:poltergeist)
38
+
39
+ session.driver.headers = { 'User-Agent' => @user_agent }
40
+ session.visit url
41
+ Nokogiri::HTML(session.html)
42
+ end
20
43
  end
21
44
  end
22
45
  end
@@ -1,3 +1,3 @@
1
- class Maxwell
2
- VERSION = "0.2.0"
1
+ module Maxwell
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/maxwell.rb CHANGED
@@ -1,19 +1,23 @@
1
1
  require "maxwell/converter"
2
2
 
3
- class Maxwell
3
+ module Maxwell
4
4
  class Base
5
5
  class << self
6
6
  def attr_scrape(*attr_scrapes)
7
7
  @acquirer_class = Class.new do
8
8
  attr_accessor *attr_scrapes
9
- @@attributes = attr_scrapes
9
+ @attr_scrapes = attr_scrapes
10
+
11
+ def self.attr_scrapes
12
+ @attr_scrapes
13
+ end
10
14
 
11
15
  def initialize(nokogiri_obj)
12
16
  @html = nokogiri_obj
13
17
  end
14
18
 
15
19
  def result
16
- @@attributes.map { |k| [k, send(k)] }.to_h
20
+ self.class.attr_scrapes.map { |k| [k, send(k)] }.to_h
17
21
  end
18
22
  end
19
23
  end
@@ -26,11 +30,15 @@ class Maxwell
26
30
  def regist_handler(&handler_blk)
27
31
  @handler_blk = handler_blk
28
32
  end
33
+
34
+ def use_poltergeist(value)
35
+ @use_poltergeist = value
36
+ end
29
37
  end
30
38
 
31
39
  def execute(root_url)
32
40
  if self.link_selectore
33
- html = Maxwell::Converter.execute(root_url)
41
+ html = Maxwell::Converter.call(root_url, use_poltergeist)
34
42
  html.css(self.link_selectore).each do |a|
35
43
  execute_for_result a[:href]
36
44
  end
@@ -39,6 +47,10 @@ class Maxwell
39
47
  end
40
48
  end
41
49
 
50
+ def use_poltergeist
51
+ self.class.instance_eval("@use_poltergeist")
52
+ end
53
+
42
54
  def link_selectore
43
55
  self.class.instance_eval("@link_selectore")
44
56
  end
@@ -57,7 +69,7 @@ class Maxwell
57
69
 
58
70
  private
59
71
  def execute_for_result(tip_url)
60
- acquirer = acquirer_class.new(Maxwell::Converter.execute(tip_url))
72
+ acquirer = acquirer_class.new(Maxwell::Converter.call(tip_url, use_poltergeist))
61
73
  acquirer.instance_eval &self.strategy_blk
62
74
 
63
75
  acquirer.result.tap do |result|
data/maxwell.gemspec CHANGED
@@ -21,6 +21,8 @@ Gem::Specification.new do |spec|
21
21
 
22
22
  spec.add_dependency "nokogiri"
23
23
  spec.add_dependency "httpclient"
24
+ spec.add_dependency 'poltergeist'
25
+ spec.add_dependency 'capybara'
24
26
 
25
27
  spec.add_development_dependency "bundler", "~> 1.10"
26
28
  spec.add_development_dependency "rake", "~> 10.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: maxwell
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - gogotanaka
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-02-08 00:00:00.000000000 Z
11
+ date: 2016-02-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -38,6 +38,34 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: poltergeist
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: capybara
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
41
69
  - !ruby/object:Gem::Dependency
42
70
  name: bundler
43
71
  requirement: !ruby/object:Gem::Requirement