amazon_dp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in amazon_dp.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 kimoto
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # AmazonDP
2
+
3
+ Amazon Description of Product page parser
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'amazon_dp'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install amazon_dp
18
+
19
+ ## Usage
20
+
21
+ #1
22
+ require 'amazon_dp'
23
+ page_info = AmazonDP.get("http://www.amazon.co.jp/dp/ASIN_CODE")
24
+
25
+ #2
26
+ require 'amazon_dp'
27
+ page_info = AmazonDP.get("ASIN_CODE")
28
+
29
+ #3
30
+ require 'amazon_dp'
31
+ f = AmazonDP::Fetcher.new
32
+ f.adult_auth
33
+ html = f.fetch("URL_OR_ASIN_CODE")
34
+ page_info = AmazonDP::Parser.new.parse_html(html)
35
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/amazon_dp.gemspec ADDED
@@ -0,0 +1,17 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/amazon_dp/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["kimoto"]
6
+ gem.email = ["sub+peerler@gmail.com"]
7
+ gem.description = %q{Amazon Description of Product page parser}
8
+ gem.summary = %q{Amazon Description of Product page parser}
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "amazon_dp"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = AmazonDP::VERSION
17
+ end
@@ -0,0 +1,23 @@
1
+ #!/bin/env ruby
2
+ # encoding: utf-8
3
+ # Author: kimoto
4
+ require 'amazon_dp'
5
+
6
+ AmazonDP::Fetcher.logger = Logger.new(STDERR)
7
+ AmazonDP::Parser.logger = Logger.new(STDERR)
8
+
9
+ fetcher = AmazonDP::Fetcher.new
10
+ fetcher.adult_auth
11
+
12
+ parser = AmazonDP::Parser.new
13
+
14
+ page_data = fetcher.fetch("http://www.amazon.co.jp/dp/4894714078")
15
+ page_info = parser.parse_html(page_data)
16
+ p page_info
17
+
18
+ page_data = fetcher.fetch("http://www.amazon.co.jp/dp/B00A40XOU4/")
19
+ p page_info = parser.parse_html(page_data)
20
+
21
+ page_data = fetcher.fetch("B00A3EXJP6")
22
+ p page_info = parser.parse_html(page_data)
23
+
@@ -0,0 +1,8 @@
1
+ #!/bin/env ruby
2
+ # encoding: utf-8
3
+ # Author: kimoto
4
+ require 'amazon_dp'
5
+
6
+ page_info = AmazonDP.get("B00A3EXJP6")
7
+ p page_info
8
+
@@ -0,0 +1,56 @@
1
+ #!/bin/env ruby
2
+ # encoding: utf-8
3
+ # Author: kimoto
4
+ require 'nokogiri'
5
+ require 'mechanize'
6
+ require 'logger'
7
+
8
+ module AmazonDP
9
+ class Fetcher
10
+ ADULT_AUTH_URL = "http://www.amazon.co.jp/gp/product/black-curtain-redirect.html"
11
+ PERMALINK_URL = "http://www.amazon.co.jp/gp/product/[ASIN_CODE]?ie=UTF8&redirect=true"
12
+ USER_AGENT = "w3m/0.5.2"
13
+
14
+ @@logger = Logger.new(nil)
15
+ def self.logger=(logger)
16
+ @@logger = logger
17
+ end
18
+
19
+ def initialize(opts={})
20
+ @user_agent = opts[:user_agent] ? opts[:user_agent] : USER_AGENT
21
+ @adult_auth_url = opts[:adult_auth_url] ? opts[:adult_auth_url] : ADULT_AUTH_URL
22
+ @permalink_url = opts[:permalink_url] ? opts[:permalink_url] : PERMALINK_URL
23
+ @agent = Mechanize.new{ |agent|
24
+ agent.user_agent = @user_agent
25
+ }
26
+ end
27
+
28
+ def adult_auth
29
+ @@logger.info "try adult authentication"
30
+ @agent.redirect_ok = false
31
+ page = @agent.get(@adult_auth_url)
32
+ @agent.redirect_ok = true
33
+ @@logger.info "adult authentication done"
34
+ end
35
+
36
+ def fetch(*params)
37
+ if params == /^https?/
38
+ fetch_url(*params)
39
+ else
40
+ fetch_asin_code(*params)
41
+ end
42
+ end
43
+
44
+ private
45
+ def fetch_asin_code(asin_code)
46
+ fetch_url @permalink_url.clone.gsub("[ASIN_CODE]", asin_code)
47
+ end
48
+
49
+ def fetch_url(url)
50
+ @@logger.info "try fetch information: #{url}"
51
+ page = @agent.get(url)
52
+ @@logger.info "fetched"
53
+ page.body
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,95 @@
1
+ #!/bin/env ruby
2
+ # encoding: utf-8
3
+ require 'nokogiri'
4
+ require 'logger'
5
+
6
+ module AmazonDP
7
+ class AmazonDPError < StandardError; end
8
+
9
+ class PageInfo
10
+ attr_accessor :is_kindle
11
+ attr_accessor :is_adult
12
+ attr_accessor :kindle_price
13
+ attr_accessor :kindle_pages
14
+ attr_accessor :stars
15
+ attr_accessor :reviews
16
+ attr_accessor :iine
17
+ def initialize(opts={})
18
+ opts.each do |k,v|
19
+ self.send("#{k}=", v)
20
+ end
21
+ end
22
+ end
23
+
24
+ class Parser
25
+ class ParseError < AmazonDPError; end
26
+ class Amazon18xError < ParseError; end
27
+
28
+ @@logger = Logger.new(nil)
29
+ def self.logger=(logger)
30
+ @@logger = logger
31
+ end
32
+
33
+ def is_adult_notice_page?(doc)
34
+ return !doc.search("span.alert").empty?
35
+ end
36
+
37
+ def is_adult_product_page?(doc)
38
+ return doc.search("span.highlight").first.children.last.text.strip == "[アダルト]"
39
+ rescue
40
+ return false
41
+ end
42
+
43
+ def is_kindle_product_page?(doc)
44
+ return doc.search("table > tr > td > div.content > ul > li").first.children.last.text.strip == "Kindle版"
45
+ rescue
46
+ return false
47
+ end
48
+
49
+ def extract_kindle_price(doc)
50
+ doc.search(".priceLarge").text.strip.match(/[\d.]+$/).to_s.to_f
51
+ end
52
+
53
+ def extract_kindle_pages(doc)
54
+ doc.search("li.listItem > a#pageCountAvailable > span").text.strip.match(/^[\d.]+/).to_s.to_i
55
+ end
56
+
57
+ def extract_stars(doc)
58
+ doc.search("span.crAvgStars > span.asinReviewsSummary > a > span.swSprite")[0].attributes["title"].value.match(/[\d.]+$/).to_s.to_f
59
+ rescue
60
+ nil
61
+ end
62
+
63
+ def extract_reviews(doc)
64
+ doc.search("div.tiny > b").children.first.to_s.match(/^[\d.]+/).to_s.to_i
65
+ rescue
66
+ nil
67
+ end
68
+
69
+ def extract_iine(doc)
70
+ doc.search("span.amazonLikeCountContainer > span").children.first.to_s.gsub(/,/, "").to_i
71
+ rescue
72
+ nil
73
+ end
74
+
75
+ def parse_html(html_data)
76
+ @@logger.info "try to parse html data"
77
+ html_data.encode("UTF-8", "cp932")
78
+ doc = Nokogiri::HTML(html_data)
79
+ if is_adult_notice_page?(doc)
80
+ @@logger.info "this page is adult amazon page"
81
+ raise Amazon18xError
82
+ end
83
+ kindle_flag = is_kindle_product_page?(doc)
84
+ return PageInfo.new(
85
+ :is_adult => is_adult_product_page?(doc),
86
+ :is_kindle => kindle_flag,
87
+ :kindle_price => (kindle_flag ? extract_kindle_price(doc) : nil),
88
+ :kindle_pages => (kindle_flag ? extract_kindle_pages(doc) : nil),
89
+ :stars => extract_stars(doc),
90
+ :reviews => extract_reviews(doc),
91
+ :iine => extract_iine(doc)
92
+ )
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,10 @@
1
+ #!/bin/env ruby
2
+
3
+ module AmazonDP
4
+ def self.get(url_or_asin)
5
+ f = Fetcher.new
6
+ f.adult_auth
7
+ Parser.new.parse_html f.fetch(url_or_asin)
8
+ end
9
+ end
10
+
@@ -0,0 +1,3 @@
1
+ module AmazonDP
2
+ VERSION = "0.0.1"
3
+ end
data/lib/amazon_dp.rb ADDED
@@ -0,0 +1,7 @@
1
+ require "amazon_dp/version"
2
+ require 'amazon_dp/fetcher'
3
+ require 'amazon_dp/parser'
4
+ require 'amazon_dp/utils'
5
+
6
+ module AmazonDP
7
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: amazon_dp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - kimoto
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-05 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Amazon Description of Product page parser
15
+ email:
16
+ - sub+peerler@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - Gemfile
23
+ - LICENSE
24
+ - README.md
25
+ - Rakefile
26
+ - amazon_dp.gemspec
27
+ - examples/fetch_and_parse.rb
28
+ - examples/quick_fetch.rb
29
+ - lib/amazon_dp.rb
30
+ - lib/amazon_dp/fetcher.rb
31
+ - lib/amazon_dp/parser.rb
32
+ - lib/amazon_dp/utils.rb
33
+ - lib/amazon_dp/version.rb
34
+ homepage: ''
35
+ licenses: []
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.24
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: Amazon Description of Product page parser
58
+ test_files: []
59
+ has_rdoc: