maxwell 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4dee2d7fc3af9b8e4dc64b1b6588be1735aa9f35
4
- data.tar.gz: cc1b31de45500795d3630bad15a2b4c0dc0b6117
3
+ metadata.gz: 0b4a07c10e6638f009e5d96fa34d98d2c92985aa
4
+ data.tar.gz: 641ac72166235db4b28e7ccebee77e8da1b7c3e0
5
5
  SHA512:
6
- metadata.gz: ca1d52b57421703c92f3e2697f56dca22765baca6180b4e28da0669474b74d4ad580ff709c8934b3c173f44694d991e760c579f8c2469f1ad0770b839b7caf47
7
- data.tar.gz: 7ad8eee5272c711110a5111a23b7ce450f4f5ceb5a1fe7b5943be57980d5ed1504d09f02707c9474b0c39df269db901e0e86124343ee6bf043339d6f639591b6
6
+ metadata.gz: 04d92bce282bbbf263c83b1434f05cbd9043a169bf893a360a80701755d645aef03c60af4a69187ac763c8a1fa95e1596854d175fd0aaa6d66c787c3eab36081
7
+ data.tar.gz: 8cfb57804bc5575573a30cb10166cf46b8c6761969ca8bbdef1accd758c16d0823a9c6c203103bf45a244303f9c54f8c26ff1f709425d3c944e7ec7767e7819b
data/README.md CHANGED
@@ -21,19 +21,22 @@ Or install it yourself as:
21
21
  ## Usage
22
22
 
23
23
  ```ruby
24
- Maxwell::DO({
25
- "root url" => {
26
- "css selector to links" => {
27
- "css selector to links" => ->(html) {
28
- html.title
29
- # write parser here
30
- }
31
- }
32
- }
33
- }) do |result|
34
- p result
35
- # write code for handling results
24
+ class YahooScraper < Maxwell::Base
25
+ attr_scrape :title, :url, :address
26
+
27
+ regist_strategy("h3.slcHead.cFix a") do
28
+ @title = @html.title
29
+ @url = @html.css("td.sdhk jdj").text
30
+ @address = @html.css("table tr.ddad").text
31
+ end
32
+
33
+ regist_handler do |result|
34
+ p result
35
+ end
36
+
36
37
  end
38
+
39
+ YahooScraper.new.execute("https://www.yahoo.com/")
37
40
  ```
38
41
 
39
42
  ## Development
@@ -1,22 +1,45 @@
1
1
  require 'nokogiri'
2
2
  require 'httpclient'
3
3
 
4
- class Maxwell
5
- module Converter
6
- def self.execute(url)
7
- client = HTTPClient.new(
8
- default_header: {
9
- "User-Agent" => "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
10
- }
11
- )
4
+ require 'nokogiri'
5
+ require 'capybara'
6
+ require 'capybara/poltergeist'
12
7
 
13
- html = begin
14
- client.get_content(url)
15
- rescue
16
- ""
8
+ module Maxwell
9
+ class Converter
10
+ @user_agent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
11
+ class << self
12
+ def call(url, use_poltergeist=false)
13
+ use_poltergeist ? call_with_js(url) : call_without_js(url)
17
14
  end
18
15
 
19
- Nokogiri::HTML(html)
16
+ def call_without_js(url)
17
+ client = HTTPClient.new(
18
+ default_header: {
19
+ "User-Agent" => @user_agent
20
+ }
21
+ )
22
+
23
+ html = begin
24
+ client.get_content(url)
25
+ rescue
26
+ ""
27
+ end
28
+
29
+ Nokogiri::HTML(html)
30
+ end
31
+
32
+ def call_with_js(url)
33
+ Capybara.register_driver :poltergeist do |app|
34
+ Capybara::Poltergeist::Driver.new(app, { js_errors: false, timeout: 1000 })
35
+ end
36
+ Capybara.default_selector = :xpath
37
+ session = Capybara::Session.new(:poltergeist)
38
+
39
+ session.driver.headers = { 'User-Agent' => @user_agent }
40
+ session.visit url
41
+ Nokogiri::HTML(session.html)
42
+ end
20
43
  end
21
44
  end
22
45
  end
@@ -1,3 +1,3 @@
1
- class Maxwell
2
- VERSION = "0.2.0"
1
+ module Maxwell
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/maxwell.rb CHANGED
@@ -1,19 +1,23 @@
1
1
  require "maxwell/converter"
2
2
 
3
- class Maxwell
3
+ module Maxwell
4
4
  class Base
5
5
  class << self
6
6
  def attr_scrape(*attr_scrapes)
7
7
  @acquirer_class = Class.new do
8
8
  attr_accessor *attr_scrapes
9
- @@attributes = attr_scrapes
9
+ @attr_scrapes = attr_scrapes
10
+
11
+ def self.attr_scrapes
12
+ @attr_scrapes
13
+ end
10
14
 
11
15
  def initialize(nokogiri_obj)
12
16
  @html = nokogiri_obj
13
17
  end
14
18
 
15
19
  def result
16
- @@attributes.map { |k| [k, send(k)] }.to_h
20
+ self.class.attr_scrapes.map { |k| [k, send(k)] }.to_h
17
21
  end
18
22
  end
19
23
  end
@@ -26,11 +30,15 @@ class Maxwell
26
30
  def regist_handler(&handler_blk)
27
31
  @handler_blk = handler_blk
28
32
  end
33
+
34
+ def use_poltergeist(value)
35
+ @use_poltergeist = value
36
+ end
29
37
  end
30
38
 
31
39
  def execute(root_url)
32
40
  if self.link_selectore
33
- html = Maxwell::Converter.execute(root_url)
41
+ html = Maxwell::Converter.call(root_url, use_poltergeist)
34
42
  html.css(self.link_selectore).each do |a|
35
43
  execute_for_result a[:href]
36
44
  end
@@ -39,6 +47,10 @@ class Maxwell
39
47
  end
40
48
  end
41
49
 
50
+ def use_poltergeist
51
+ self.class.instance_eval("@use_poltergeist")
52
+ end
53
+
42
54
  def link_selectore
43
55
  self.class.instance_eval("@link_selectore")
44
56
  end
@@ -57,7 +69,7 @@ class Maxwell
57
69
 
58
70
  private
59
71
  def execute_for_result(tip_url)
60
- acquirer = acquirer_class.new(Maxwell::Converter.execute(tip_url))
72
+ acquirer = acquirer_class.new(Maxwell::Converter.call(tip_url, use_poltergeist))
61
73
  acquirer.instance_eval &self.strategy_blk
62
74
 
63
75
  acquirer.result.tap do |result|
data/maxwell.gemspec CHANGED
@@ -21,6 +21,8 @@ Gem::Specification.new do |spec|
21
21
 
22
22
  spec.add_dependency "nokogiri"
23
23
  spec.add_dependency "httpclient"
24
+ spec.add_dependency 'poltergeist'
25
+ spec.add_dependency 'capybara'
24
26
 
25
27
  spec.add_development_dependency "bundler", "~> 1.10"
26
28
  spec.add_development_dependency "rake", "~> 10.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: maxwell
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - gogotanaka
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-02-08 00:00:00.000000000 Z
11
+ date: 2016-02-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -38,6 +38,34 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: poltergeist
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: capybara
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
41
69
  - !ruby/object:Gem::Dependency
42
70
  name: bundler
43
71
  requirement: !ruby/object:Gem::Requirement