meta_hari 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bff4312d86ff4bee5be187b150ef238f404369f5
4
+ data.tar.gz: 06b08dbea9f2720e174f67914a208d3c7ae7ede2
5
+ SHA512:
6
+ metadata.gz: 735398073cd3179c1cb9e7319fcf4cc050448ca9d187b7834eb4115f8a2f8dafc5d97ea0ec580d7feec967a35aaa38db8503a677a264cb8392291998133c8a52
7
+ data.tar.gz: 919c189e56767d3ec3f469a9e8083e65d0601634eda0e0c189e0f3a51726f2deaa9921e62268cd9f94dded4db89daf8c554352431c0443a0cbf725ff26ff45b1
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.rubocop.yml ADDED
@@ -0,0 +1,10 @@
1
+ AllCops:
2
+ Exclude:
3
+ - 'meta_hari.gemspec'
4
+ - 'Guardfile'
5
+
6
+ Style/Documentation:
7
+ Enabled: false
8
+
9
+ Style/SymbolArray:
10
+ Enabled: true
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in meta_hari.gemspec
4
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,135 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ ## Uncomment and set this to only include directories you want to watch
5
+ # directories %w(app lib config test spec features)
6
+
7
+ ## Uncomment to clear the screen before every task
8
+ # clearing :on
9
+
10
+ ## Guard internally checks for changes in the Guardfile and exits.
11
+ ## If you want Guard to automatically start up again, run guard in a
12
+ ## shell loop, e.g.:
13
+ ##
14
+ ## $ while bundle exec guard; do echo "Restarting Guard..."; done
15
+ ##
16
+ ## Note: if you are using the `directories` clause above and you are not
17
+ ## watching the project directory ('.'), then you will want to move
18
+ ## the Guardfile to a watched dir and symlink it back, e.g.
19
+ #
20
+ # $ mkdir config
21
+ # $ mv Guardfile config/
22
+ # $ ln -s config/Guardfile .
23
+ #
24
+ # and, you'll have to watch "config/Guardfile" instead of "Guardfile"
25
+
26
+ # Note: The cmd option is now required due to the increasing number of ways
27
+ # rspec may be run, below are examples of the most common uses.
28
+ # * bundler: 'bundle exec rspec'
29
+ # * bundler binstubs: 'bin/rspec'
30
+ # * spring: 'bin/rspec' (This will use spring if running and you have
31
+ # installed the spring binstubs per the docs)
32
+ # * zeus: 'zeus rspec' (requires the server to be started separately)
33
+ # * 'just' rspec: 'rspec'
34
+
35
+ guard :rspec, cmd: "bundle exec rspec" do
36
+ require "guard/rspec/dsl"
37
+ dsl = Guard::RSpec::Dsl.new(self)
38
+
39
+ # Feel free to open issues for suggestions and improvements
40
+
41
+ # RSpec files
42
+ rspec = dsl.rspec
43
+ watch(rspec.spec_helper) { rspec.spec_dir }
44
+ watch(rspec.spec_support) { rspec.spec_dir }
45
+ watch(rspec.spec_files)
46
+
47
+ # Ruby files
48
+ ruby = dsl.ruby
49
+ dsl.watch_spec_files_for(ruby.lib_files)
50
+
51
+ # Rails files
52
+ rails = dsl.rails(view_extensions: %w(erb haml slim))
53
+ dsl.watch_spec_files_for(rails.app_files)
54
+ dsl.watch_spec_files_for(rails.views)
55
+
56
+ watch(rails.controllers) do |m|
57
+ [
58
+ rspec.spec.("routing/#{m[1]}_routing"),
59
+ rspec.spec.("controllers/#{m[1]}_controller"),
60
+ rspec.spec.("acceptance/#{m[1]}")
61
+ ]
62
+ end
63
+
64
+ # Rails config changes
65
+ watch(rails.spec_helper) { rspec.spec_dir }
66
+ watch(rails.routes) { "#{rspec.spec_dir}/routing" }
67
+ watch(rails.app_controller) { "#{rspec.spec_dir}/controllers" }
68
+
69
+ # Capybara features specs
70
+ watch(rails.view_dirs) { |m| rspec.spec.("features/#{m[1]}") }
71
+
72
+ # Turnip features and steps
73
+ watch(%r{^spec/acceptance/(.+)\.feature$})
74
+ watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) do |m|
75
+ Dir[File.join("**/#{m[1]}.feature")][0] || "spec/acceptance"
76
+ end
77
+ end
78
+
79
+ # Note: The cmd option is now required due to the increasing number of ways
80
+ # rspec may be run, below are examples of the most common uses.
81
+ # * bundler: 'bundle exec rspec'
82
+ # * bundler binstubs: 'bin/rspec'
83
+ # * spring: 'bin/rspec' (This will use spring if running and you have
84
+ # installed the spring binstubs per the docs)
85
+ # * zeus: 'zeus rspec' (requires the server to be started separately)
86
+ # * 'just' rspec: 'rspec'
87
+
88
+ guard :rspec, cmd: "bundle exec rspec" do
89
+ require "guard/rspec/dsl"
90
+ dsl = Guard::RSpec::Dsl.new(self)
91
+
92
+ # Feel free to open issues for suggestions and improvements
93
+
94
+ # RSpec files
95
+ rspec = dsl.rspec
96
+ watch(rspec.spec_helper) { rspec.spec_dir }
97
+ watch(rspec.spec_support) { rspec.spec_dir }
98
+ watch(rspec.spec_files)
99
+
100
+ # Ruby files
101
+ ruby = dsl.ruby
102
+ dsl.watch_spec_files_for(ruby.lib_files)
103
+
104
+ # Rails files
105
+ rails = dsl.rails(view_extensions: %w(erb haml slim))
106
+ dsl.watch_spec_files_for(rails.app_files)
107
+ dsl.watch_spec_files_for(rails.views)
108
+
109
+ watch(rails.controllers) do |m|
110
+ [
111
+ rspec.spec.("routing/#{m[1]}_routing"),
112
+ rspec.spec.("controllers/#{m[1]}_controller"),
113
+ rspec.spec.("acceptance/#{m[1]}")
114
+ ]
115
+ end
116
+
117
+ # Rails config changes
118
+ watch(rails.spec_helper) { rspec.spec_dir }
119
+ watch(rails.routes) { "#{rspec.spec_dir}/routing" }
120
+ watch(rails.app_controller) { "#{rspec.spec_dir}/controllers" }
121
+
122
+ # Capybara features specs
123
+ watch(rails.view_dirs) { |m| rspec.spec.("features/#{m[1]}") }
124
+
125
+ # Turnip features and steps
126
+ watch(%r{^spec/acceptance/(.+)\.feature$})
127
+ watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) do |m|
128
+ Dir[File.join("**/#{m[1]}.feature")][0] || "spec/acceptance"
129
+ end
130
+ end
131
+
132
+ guard :rubocop do
133
+ watch(%r{.+\.rb$})
134
+ watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
135
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Paul Spieker
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,86 @@
1
+ # MetaHari
2
+
3
+ Meta Hari is receiving product informations from a given product link
4
+ (i.e. from Amazon).
5
+
6
+ The name Meta Hari comes from
7
+ [Mata Hari](https://en.wikipedia.org/wiki/Mata_Hari), one of the most
8
+ popular spies.
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ ```ruby
15
+ gem 'meta_hari'
16
+ ```
17
+
18
+ And then execute:
19
+
20
+ $ bundle
21
+
22
+ Or install it yourself as:
23
+
24
+ $ gem install meta_hari
25
+
26
+ ## Usage
27
+
28
+ In order to receive product informations, just pass the URL containing
29
+ thous informations to the method `MetaHari.spy`.
30
+
31
+ ```ruby
32
+ product = MetaHari.spy('http://www.amazon.de/Gastroback-42429-Design-Wasserkocher-Advanced/dp/B000LQXC2Q/ref=sr_1_1')
33
+ product.inspect # => #<OpenStruct name="Gastroback 42429 Design Wasserkocher Advanced Pro", image="http://ecx.images-amazon.com/images/I/814Yl6mxLsL._SL1500_.jpg", description="">
34
+ ```
35
+
36
+ ## Implemented spyglasses
37
+
38
+ A spyglass is a support class for a specific shop. In order to support
39
+ custom shops which can not be spyed by the generic spyglass
40
+ (`MetaHari::Spyglass::Base`), a new spyglass has to be created.
41
+
42
+ * Amazon DE
43
+ * Generic
44
+ * Shops using [JSON-LD](https://developers.google.com/structured-data/rich-snippets/products)
45
+
46
+ ### Creating a spyglass
47
+
48
+ A spyglass has to be a class within the namespace `MetaHari::Spyglass`
49
+ and must extend the class `MetaHari::Spyglass::Base`. The methods
50
+ `self.suitable?` and `spy`.
51
+
52
+ ```ruby
53
+ module MetaHari
54
+ module Spyglass
55
+ class AmazonDe < Base
56
+ def self.suitable?(uri)
57
+ %w(amazon.de www.amazon.de).include? uri.host.downcase
58
+ end
59
+
60
+ def spy
61
+ OpenStruct.new(name: title, image: image, description: '')
62
+ end
63
+
64
+ protected
65
+
66
+ def title
67
+ document.css('#productTitle').text
68
+ end
69
+
70
+ def image
71
+ data = document.css('img#landingImage')
72
+ data &&= data.attr 'data-old-hires'
73
+ data && data.value
74
+ end
75
+ end
76
+ end
77
+ end
78
+ ```
79
+
80
+ ## Contributing
81
+
82
+ 1. Fork it ( https://github.com/spieker/meta_hari/fork )
83
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
84
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
85
+ 4. Push to the branch (`git push origin my-new-feature`)
86
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new
5
+
6
+ task default: :spec
7
+ task test: :spec
8
+
9
+ desc 'Open an irb session preloaded with this library'
10
+ task :console do
11
+ sh 'irb -rubygems -I lib -r meta_hari.rb'
12
+ end
@@ -0,0 +1,29 @@
1
+ module MetaHari
2
+ module Helpers
3
+ class JsonLd
4
+ attr_reader :document
5
+
6
+ def initialize(document)
7
+ @document = document
8
+ end
9
+
10
+ def data(type = 'Product')
11
+ (json['@type'] == type) ? json : {}
12
+ end
13
+
14
+ protected
15
+
16
+ def selector
17
+ 'script[type="application/ld+json"]'
18
+ end
19
+
20
+ def json
21
+ @json ||= begin
22
+ script = document.css(selector).first
23
+ return {} unless script
24
+ JSON.parse! script
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,6 @@
1
+ require 'meta_hari/helpers/json_ld'
2
+
3
+ module MetaHari
4
+ module Helpers
5
+ end
6
+ end
@@ -0,0 +1,25 @@
1
+ module MetaHari
2
+ module Spyglass
3
+ class AmazonDe < Base
4
+ def self.suitable?(uri)
5
+ %w(amazon.de www.amazon.de).include? uri.host.downcase
6
+ end
7
+
8
+ def spy
9
+ OpenStruct.new(name: title, image: image, description: '')
10
+ end
11
+
12
+ protected
13
+
14
+ def title
15
+ document.css('#productTitle').text
16
+ end
17
+
18
+ def image
19
+ data = document.css('img#landingImage')
20
+ data &&= data.attr 'data-old-hires'
21
+ data && data.value
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,63 @@
1
+ require 'ostruct'
2
+
3
+ module MetaHari
4
+ module Spyglass
5
+ class Base
6
+ attr_reader :uri
7
+
8
+ def self.suitable?(uri)
9
+ fail StandardError.new, "not implemented for '#{uri.host}'"
10
+ end
11
+
12
+ def initialize(uri)
13
+ @uri = uri
14
+ end
15
+
16
+ def spy
17
+ OpenStruct.new [
18
+ spy_json_ld
19
+ ].inject({}) { |a, e| a.merge e }
20
+ end
21
+
22
+ protected
23
+
24
+ def user_agent
25
+ [
26
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5)',
27
+ 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125',
28
+ 'Safari/537.36'
29
+ ].join(' ')
30
+ end
31
+
32
+ def fetch_request
33
+ path = uri.path.empty? ? '/' : uri.path
34
+ Net::HTTP::Get.new path, 'User-Agent' => user_agent
35
+ end
36
+
37
+ def fetch_response
38
+ Net::HTTP.start uri.host, uri.port do |http|
39
+ http.request fetch_request
40
+ end
41
+ end
42
+
43
+ def fetch_data(limit = 10)
44
+ return @_data if @_data
45
+ fail ArgumentError.new, 'HTTP redirect too deep' if limit == 0
46
+ case res = fetch_response
47
+ when Net::HTTPSuccess then @_data = res.body
48
+ when Net::HTTPRedirection then fetch_data res['location'], limit - 1
49
+ else res.error!
50
+ end
51
+ end
52
+
53
+ def document
54
+ @document ||= Nokogiri::HTML fetch_data
55
+ end
56
+
57
+ def spy_json_ld
58
+ json_ld = MetaHari::Helpers::JsonLd.new(document)
59
+ json_ld.data
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,7 @@
1
+ require 'meta_hari/spyglass/base'
2
+ require 'meta_hari/spyglass/amazon_de'
3
+
4
+ module MetaHari
5
+ module Spyglass
6
+ end
7
+ end
@@ -0,0 +1,4 @@
1
+ # MetaHari version
2
+ module MetaHari
3
+ VERSION = '0.0.1'
4
+ end
data/lib/meta_hari.rb ADDED
@@ -0,0 +1,45 @@
1
+ require 'nokogiri'
2
+ require 'json'
3
+ require 'uri'
4
+ require 'net/http'
5
+ require 'meta_hari/version'
6
+ require 'meta_hari/helpers'
7
+ require 'meta_hari/spyglass'
8
+
9
+ # MetaHary will find product informations for a given product link. The
10
+ # information will be wrapped into an OpenStruct.
11
+ #
12
+ # Example
13
+ # =======
14
+ #
15
+ # ```ruby
16
+ # product = MetaHari.spy('http://example.com/product.html')
17
+ # ```
18
+ module MetaHari
19
+ class <<self
20
+ def spy(url)
21
+ uri = URI.parse url
22
+ spyglass = suitable_spyglass_instance uri
23
+ spyglass.spy
24
+ end
25
+
26
+ private
27
+
28
+ def suitable_spyglass_instance(uri)
29
+ klass = find_suitable_spyglass(uri)
30
+ klass.new(uri)
31
+ end
32
+
33
+ # Finding a suitable spyglass for the given URL. If no suitable spyglass
34
+ # is found, the default spyglass (MetaHari::Spyglass::Base) is returned.
35
+ #
36
+ def find_suitable_spyglass(uri)
37
+ spyglasses = MetaHari::Spyglass.constants.map do |c|
38
+ MetaHari::Spyglass.const_get(c)
39
+ end
40
+ spyglasses.select! { |spyglass| spyglass < MetaHari::Spyglass::Base }
41
+ suitable_spyglass = spyglasses.find { |spyglass| spyglass.suitable? uri }
42
+ suitable_spyglass || MetaHari::Spyglass::Base
43
+ end
44
+ end
45
+ end
data/meta_hari.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'meta_hari/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'meta_hari'
8
+ spec.version = MetaHari::VERSION
9
+ spec.authors = ['Paul Spieker']
10
+ spec.email = ['p.spieker@duenos.de']
11
+ spec.summary = %q{Receiving product informations from a given link.}
12
+ spec.homepage = 'https://github.com/spieker/meta_hari'
13
+ spec.license = 'MIT'
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ['lib']
19
+
20
+ spec.add_development_dependency 'bundler', '~> 1.7'
21
+ spec.add_development_dependency 'rake', '~> 10.0'
22
+ spec.add_development_dependency 'rspec', '~> 3.2.0'
23
+ spec.add_development_dependency 'guard', '~> 2.12.5'
24
+ spec.add_development_dependency 'guard-rspec', '~> 4.5.0'
25
+ spec.add_development_dependency 'rubocop', '~> 0.31.0'
26
+ spec.add_development_dependency 'guard-rubocop', '~> 1.2.0'
27
+ spec.add_development_dependency 'pry', '~> 0.10.1'
28
+ spec.add_development_dependency 'nokogiri', '~> 1.6.6'
29
+ end
@@ -0,0 +1,74 @@
1
+ require 'spec_helper'
2
+
3
+ describe MetaHari::Helpers::JsonLd do
4
+ let(:html) { resource_content('json_ld_example.html') }
5
+ let(:document) { Nokogiri::HTML html }
6
+ subject { described_class.new document }
7
+
8
+ describe '#initialize' do
9
+ it 'assigns the document to the reader' do
10
+ expect(subject.document).to be document
11
+ end
12
+ end
13
+
14
+ context 'when containing JSON-LD' do
15
+ describe '#json' do
16
+ subject { described_class.new(document).send :json }
17
+
18
+ it { should be_a Hash }
19
+ it { should have_key '@context' }
20
+ it { should have_key '@type' }
21
+ it { should have_key 'name' }
22
+ it { should have_key 'image' }
23
+ it { should have_key 'description' }
24
+
25
+ it 'is a product' do
26
+ expect(subject['@type']).to eql 'Product'
27
+ end
28
+ end
29
+
30
+ describe '#data' do
31
+ it 'uses #json' do
32
+ expect(subject).to receive(:json).and_return({})
33
+ subject.data
34
+ end
35
+
36
+ context 'when type matches' do
37
+ subject { described_class.new(document).data }
38
+
39
+ it { should be_a Hash }
40
+ it { should have_key '@context' }
41
+ it { should have_key '@type' }
42
+ it { should have_key 'name' }
43
+ it { should have_key 'image' }
44
+ it { should have_key 'description' }
45
+ end
46
+
47
+ context 'when type does not match' do
48
+ subject { described_class.new(document).data('Something else') }
49
+
50
+ it { should be_a Hash }
51
+ it { should_not have_key '@context' }
52
+ it { should_not have_key '@type' }
53
+ it { should_not have_key 'name' }
54
+ it { should_not have_key 'image' }
55
+ it { should_not have_key 'description' }
56
+ end
57
+ end
58
+ end
59
+
60
+ context 'when not containing JSON-LD' do
61
+ let(:html) { '' }
62
+
63
+ describe '#json' do
64
+ subject { described_class.new(document).send :json }
65
+
66
+ it { should be_a Hash }
67
+ it { should_not have_key '@context' }
68
+ it { should_not have_key '@type' }
69
+ it { should_not have_key 'name' }
70
+ it { should_not have_key 'image' }
71
+ it { should_not have_key 'description' }
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,47 @@
1
+ require 'spec_helper'
2
+
3
+ describe MetaHari::Spyglass::AmazonDe do
4
+ let(:uri) { URI.parse 'http://www.amazon.de/dp/B000LQXC2Q/ref=sr_1_1' }
5
+ let(:html) { '' }
6
+ let(:instance) { described_class.new uri }
7
+ subject { instance }
8
+
9
+ before :each do
10
+ allow(instance).to receive(:fetch_data).and_return(html)
11
+ end
12
+
13
+ it 'is suitable for amazon.de' do
14
+ uri = URI.parse 'http://amazon.de'
15
+ expect(described_class.suitable? uri).to be true
16
+ end
17
+
18
+ it 'is suitable for www.amazon.de' do
19
+ uri = URI.parse 'http://www.amazon.de'
20
+ expect(described_class.suitable? uri).to be true
21
+ end
22
+
23
+ context 'with valid amazn product page' do
24
+ let(:html) { resource_content 'amazon_de.html' }
25
+
26
+ it 'extracts the correct title' do
27
+ expected_value = 'Gastroback 42429 Design Wasserkocher Advanced Pro'
28
+ expect(subject.send :title).to eql expected_value
29
+ end
30
+
31
+ it 'extracts the correct image' do
32
+ expected_value =
33
+ 'http://ecx.images-amazon.com/images/I/814Yl6mxLsL._SL1500_.jpg'
34
+ expect(subject.send :image).to eql expected_value
35
+ end
36
+ end
37
+
38
+ describe '#spy' do
39
+ let(:html) { resource_content 'amazon_de.html' }
40
+ subject { instance.spy }
41
+
42
+ it { should be_an OpenStruct }
43
+ it { should respond_to :name }
44
+ it { should respond_to :image }
45
+ it { should respond_to :description }
46
+ end
47
+ end