meta_hari 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bff4312d86ff4bee5be187b150ef238f404369f5
4
+ data.tar.gz: 06b08dbea9f2720e174f67914a208d3c7ae7ede2
5
+ SHA512:
6
+ metadata.gz: 735398073cd3179c1cb9e7319fcf4cc050448ca9d187b7834eb4115f8a2f8dafc5d97ea0ec580d7feec967a35aaa38db8503a677a264cb8392291998133c8a52
7
+ data.tar.gz: 919c189e56767d3ec3f469a9e8083e65d0601634eda0e0c189e0f3a51726f2deaa9921e62268cd9f94dded4db89daf8c554352431c0443a0cbf725ff26ff45b1
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.rubocop.yml ADDED
@@ -0,0 +1,10 @@
1
+ AllCops:
2
+ Exclude:
3
+ - 'meta_hari.gemspec'
4
+ - 'Guardfile'
5
+
6
+ Style/Documentation:
7
+ Enabled: false
8
+
9
+ Style/SymbolArray:
10
+ Enabled: true
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in meta_hari.gemspec
4
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,135 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ ## Uncomment and set this to only include directories you want to watch
5
+ # directories %w(app lib config test spec features)
6
+
7
+ ## Uncomment to clear the screen before every task
8
+ # clearing :on
9
+
10
+ ## Guard internally checks for changes in the Guardfile and exits.
11
+ ## If you want Guard to automatically start up again, run guard in a
12
+ ## shell loop, e.g.:
13
+ ##
14
+ ## $ while bundle exec guard; do echo "Restarting Guard..."; done
15
+ ##
16
+ ## Note: if you are using the `directories` clause above and you are not
17
+ ## watching the project directory ('.'), then you will want to move
18
+ ## the Guardfile to a watched dir and symlink it back, e.g.
19
+ #
20
+ # $ mkdir config
21
+ # $ mv Guardfile config/
22
+ # $ ln -s config/Guardfile .
23
+ #
24
+ # and, you'll have to watch "config/Guardfile" instead of "Guardfile"
25
+
26
+ # Note: The cmd option is now required due to the increasing number of ways
27
+ # rspec may be run, below are examples of the most common uses.
28
+ # * bundler: 'bundle exec rspec'
29
+ # * bundler binstubs: 'bin/rspec'
30
+ # * spring: 'bin/rspec' (This will use spring if running and you have
31
+ # installed the spring binstubs per the docs)
32
+ # * zeus: 'zeus rspec' (requires the server to be started separately)
33
+ # * 'just' rspec: 'rspec'
34
+
35
+ guard :rspec, cmd: "bundle exec rspec" do
36
+ require "guard/rspec/dsl"
37
+ dsl = Guard::RSpec::Dsl.new(self)
38
+
39
+ # Feel free to open issues for suggestions and improvements
40
+
41
+ # RSpec files
42
+ rspec = dsl.rspec
43
+ watch(rspec.spec_helper) { rspec.spec_dir }
44
+ watch(rspec.spec_support) { rspec.spec_dir }
45
+ watch(rspec.spec_files)
46
+
47
+ # Ruby files
48
+ ruby = dsl.ruby
49
+ dsl.watch_spec_files_for(ruby.lib_files)
50
+
51
+ # Rails files
52
+ rails = dsl.rails(view_extensions: %w(erb haml slim))
53
+ dsl.watch_spec_files_for(rails.app_files)
54
+ dsl.watch_spec_files_for(rails.views)
55
+
56
+ watch(rails.controllers) do |m|
57
+ [
58
+ rspec.spec.("routing/#{m[1]}_routing"),
59
+ rspec.spec.("controllers/#{m[1]}_controller"),
60
+ rspec.spec.("acceptance/#{m[1]}")
61
+ ]
62
+ end
63
+
64
+ # Rails config changes
65
+ watch(rails.spec_helper) { rspec.spec_dir }
66
+ watch(rails.routes) { "#{rspec.spec_dir}/routing" }
67
+ watch(rails.app_controller) { "#{rspec.spec_dir}/controllers" }
68
+
69
+ # Capybara features specs
70
+ watch(rails.view_dirs) { |m| rspec.spec.("features/#{m[1]}") }
71
+
72
+ # Turnip features and steps
73
+ watch(%r{^spec/acceptance/(.+)\.feature$})
74
+ watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) do |m|
75
+ Dir[File.join("**/#{m[1]}.feature")][0] || "spec/acceptance"
76
+ end
77
+ end
78
+
79
+ # Note: The cmd option is now required due to the increasing number of ways
80
+ # rspec may be run, below are examples of the most common uses.
81
+ # * bundler: 'bundle exec rspec'
82
+ # * bundler binstubs: 'bin/rspec'
83
+ # * spring: 'bin/rspec' (This will use spring if running and you have
84
+ # installed the spring binstubs per the docs)
85
+ # * zeus: 'zeus rspec' (requires the server to be started separately)
86
+ # * 'just' rspec: 'rspec'
87
+
88
+ guard :rspec, cmd: "bundle exec rspec" do
89
+ require "guard/rspec/dsl"
90
+ dsl = Guard::RSpec::Dsl.new(self)
91
+
92
+ # Feel free to open issues for suggestions and improvements
93
+
94
+ # RSpec files
95
+ rspec = dsl.rspec
96
+ watch(rspec.spec_helper) { rspec.spec_dir }
97
+ watch(rspec.spec_support) { rspec.spec_dir }
98
+ watch(rspec.spec_files)
99
+
100
+ # Ruby files
101
+ ruby = dsl.ruby
102
+ dsl.watch_spec_files_for(ruby.lib_files)
103
+
104
+ # Rails files
105
+ rails = dsl.rails(view_extensions: %w(erb haml slim))
106
+ dsl.watch_spec_files_for(rails.app_files)
107
+ dsl.watch_spec_files_for(rails.views)
108
+
109
+ watch(rails.controllers) do |m|
110
+ [
111
+ rspec.spec.("routing/#{m[1]}_routing"),
112
+ rspec.spec.("controllers/#{m[1]}_controller"),
113
+ rspec.spec.("acceptance/#{m[1]}")
114
+ ]
115
+ end
116
+
117
+ # Rails config changes
118
+ watch(rails.spec_helper) { rspec.spec_dir }
119
+ watch(rails.routes) { "#{rspec.spec_dir}/routing" }
120
+ watch(rails.app_controller) { "#{rspec.spec_dir}/controllers" }
121
+
122
+ # Capybara features specs
123
+ watch(rails.view_dirs) { |m| rspec.spec.("features/#{m[1]}") }
124
+
125
+ # Turnip features and steps
126
+ watch(%r{^spec/acceptance/(.+)\.feature$})
127
+ watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) do |m|
128
+ Dir[File.join("**/#{m[1]}.feature")][0] || "spec/acceptance"
129
+ end
130
+ end
131
+
132
+ guard :rubocop do
133
+ watch(%r{.+\.rb$})
134
+ watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
135
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Paul Spieker
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,86 @@
1
+ # MetaHari
2
+
3
+ Meta Hari is receiving product informations from a given product link
4
+ (i.e. from Amazon).
5
+
6
+ The name Meta Hari comes from
7
+ [Mata Hari](https://en.wikipedia.org/wiki/Mata_Hari), one of the most
8
+ popular spies.
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ ```ruby
15
+ gem 'meta_hari'
16
+ ```
17
+
18
+ And then execute:
19
+
20
+ $ bundle
21
+
22
+ Or install it yourself as:
23
+
24
+ $ gem install meta_hari
25
+
26
+ ## Usage
27
+
28
+ In order to receive product informations, just pass the URL containing
29
+ thous informations to the method `MetaHari.spy`.
30
+
31
+ ```ruby
32
+ product = MetaHari.spy('http://www.amazon.de/Gastroback-42429-Design-Wasserkocher-Advanced/dp/B000LQXC2Q/ref=sr_1_1')
33
+ product.inspect # => #<OpenStruct name="Gastroback 42429 Design Wasserkocher Advanced Pro", image="http://ecx.images-amazon.com/images/I/814Yl6mxLsL._SL1500_.jpg", description="">
34
+ ```
35
+
36
+ ## Implemented spyglasses
37
+
38
+ A spyglass is a support class for a specific shop. In order to support
39
+ custom shops which can not be spyed by the generic spyglass
40
+ (`MetaHari::Spyglass::Base`), a new spyglass has to be created.
41
+
42
+ * Amazon DE
43
+ * Generic
44
+ * Shops using [JSON-LD](https://developers.google.com/structured-data/rich-snippets/products)
45
+
46
+ ### Creating a spyglass
47
+
48
+ A spyglass has to be a class within the namespace `MetaHari::Spyglass`
49
+ and must extend the class `MetaHari::Spyglass::Base`. The methods
50
+ `self.suitable?` and `spy`.
51
+
52
+ ```ruby
53
+ module MetaHari
54
+ module Spyglass
55
+ class AmazonDe < Base
56
+ def self.suitable?(uri)
57
+ %w(amazon.de www.amazon.de).include? uri.host.downcase
58
+ end
59
+
60
+ def spy
61
+ OpenStruct.new(name: title, image: image, description: '')
62
+ end
63
+
64
+ protected
65
+
66
+ def title
67
+ document.css('#productTitle').text
68
+ end
69
+
70
+ def image
71
+ data = document.css('img#landingImage')
72
+ data &&= data.attr 'data-old-hires'
73
+ data && data.value
74
+ end
75
+ end
76
+ end
77
+ end
78
+ ```
79
+
80
+ ## Contributing
81
+
82
+ 1. Fork it ( https://github.com/spieker/meta_hari/fork )
83
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
84
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
85
+ 4. Push to the branch (`git push origin my-new-feature`)
86
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new
5
+
6
+ task default: :spec
7
+ task test: :spec
8
+
9
+ desc 'Open an irb session preloaded with this library'
10
+ task :console do
11
+ sh 'irb -rubygems -I lib -r meta_hari.rb'
12
+ end
@@ -0,0 +1,29 @@
1
+ module MetaHari
2
+ module Helpers
3
+ class JsonLd
4
+ attr_reader :document
5
+
6
+ def initialize(document)
7
+ @document = document
8
+ end
9
+
10
+ def data(type = 'Product')
11
+ (json['@type'] == type) ? json : {}
12
+ end
13
+
14
+ protected
15
+
16
+ def selector
17
+ 'script[type="application/ld+json"]'
18
+ end
19
+
20
+ def json
21
+ @json ||= begin
22
+ script = document.css(selector).first
23
+ return {} unless script
24
+ JSON.parse! script
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,6 @@
1
+ require 'meta_hari/helpers/json_ld'
2
+
3
+ module MetaHari
4
+ module Helpers
5
+ end
6
+ end
@@ -0,0 +1,25 @@
1
+ module MetaHari
2
+ module Spyglass
3
+ class AmazonDe < Base
4
+ def self.suitable?(uri)
5
+ %w(amazon.de www.amazon.de).include? uri.host.downcase
6
+ end
7
+
8
+ def spy
9
+ OpenStruct.new(name: title, image: image, description: '')
10
+ end
11
+
12
+ protected
13
+
14
+ def title
15
+ document.css('#productTitle').text
16
+ end
17
+
18
+ def image
19
+ data = document.css('img#landingImage')
20
+ data &&= data.attr 'data-old-hires'
21
+ data && data.value
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,63 @@
1
+ require 'ostruct'
2
+
3
+ module MetaHari
4
+ module Spyglass
5
+ class Base
6
+ attr_reader :uri
7
+
8
+ def self.suitable?(uri)
9
+ fail StandardError.new, "not implemented for '#{uri.host}'"
10
+ end
11
+
12
+ def initialize(uri)
13
+ @uri = uri
14
+ end
15
+
16
+ def spy
17
+ OpenStruct.new [
18
+ spy_json_ld
19
+ ].inject({}) { |a, e| a.merge e }
20
+ end
21
+
22
+ protected
23
+
24
+ def user_agent
25
+ [
26
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5)',
27
+ 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125',
28
+ 'Safari/537.36'
29
+ ].join(' ')
30
+ end
31
+
32
+ def fetch_request
33
+ path = uri.path.empty? ? '/' : uri.path
34
+ Net::HTTP::Get.new path, 'User-Agent' => user_agent
35
+ end
36
+
37
+ def fetch_response
38
+ Net::HTTP.start uri.host, uri.port do |http|
39
+ http.request fetch_request
40
+ end
41
+ end
42
+
43
+ def fetch_data(limit = 10)
44
+ return @_data if @_data
45
+ fail ArgumentError.new, 'HTTP redirect too deep' if limit == 0
46
+ case res = fetch_response
47
+ when Net::HTTPSuccess then @_data = res.body
48
+ when Net::HTTPRedirection then fetch_data res['location'], limit - 1
49
+ else res.error!
50
+ end
51
+ end
52
+
53
+ def document
54
+ @document ||= Nokogiri::HTML fetch_data
55
+ end
56
+
57
+ def spy_json_ld
58
+ json_ld = MetaHari::Helpers::JsonLd.new(document)
59
+ json_ld.data
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,7 @@
1
+ require 'meta_hari/spyglass/base'
2
+ require 'meta_hari/spyglass/amazon_de'
3
+
4
+ module MetaHari
5
+ module Spyglass
6
+ end
7
+ end
@@ -0,0 +1,4 @@
1
+ # MetaHari version
2
+ module MetaHari
3
+ VERSION = '0.0.1'
4
+ end
data/lib/meta_hari.rb ADDED
@@ -0,0 +1,45 @@
1
+ require 'nokogiri'
2
+ require 'json'
3
+ require 'uri'
4
+ require 'net/http'
5
+ require 'meta_hari/version'
6
+ require 'meta_hari/helpers'
7
+ require 'meta_hari/spyglass'
8
+
9
+ # MetaHary will find product informations for a given product link. The
10
+ # information will be wrapped into an OpenStruct.
11
+ #
12
+ # Example
13
+ # =======
14
+ #
15
+ # ```ruby
16
+ # product = MetaHari.spy('http://example.com/product.html')
17
+ # ```
18
+ module MetaHari
19
+ class <<self
20
+ def spy(url)
21
+ uri = URI.parse url
22
+ spyglass = suitable_spyglass_instance uri
23
+ spyglass.spy
24
+ end
25
+
26
+ private
27
+
28
+ def suitable_spyglass_instance(uri)
29
+ klass = find_suitable_spyglass(uri)
30
+ klass.new(uri)
31
+ end
32
+
33
+ # Finding a suitable spyglass for the given URL. If no suitable spyglass
34
+ # is found, the default spyglass (MetaHari::Spyglass::Base) is returned.
35
+ #
36
+ def find_suitable_spyglass(uri)
37
+ spyglasses = MetaHari::Spyglass.constants.map do |c|
38
+ MetaHari::Spyglass.const_get(c)
39
+ end
40
+ spyglasses.select! { |spyglass| spyglass < MetaHari::Spyglass::Base }
41
+ suitable_spyglass = spyglasses.find { |spyglass| spyglass.suitable? uri }
42
+ suitable_spyglass || MetaHari::Spyglass::Base
43
+ end
44
+ end
45
+ end
data/meta_hari.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'meta_hari/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'meta_hari'
8
+ spec.version = MetaHari::VERSION
9
+ spec.authors = ['Paul Spieker']
10
+ spec.email = ['p.spieker@duenos.de']
11
+ spec.summary = %q{Receiving product informations from a given link.}
12
+ spec.homepage = 'https://github.com/spieker/meta_hari'
13
+ spec.license = 'MIT'
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ['lib']
19
+
20
+ spec.add_development_dependency 'bundler', '~> 1.7'
21
+ spec.add_development_dependency 'rake', '~> 10.0'
22
+ spec.add_development_dependency 'rspec', '~> 3.2.0'
23
+ spec.add_development_dependency 'guard', '~> 2.12.5'
24
+ spec.add_development_dependency 'guard-rspec', '~> 4.5.0'
25
+ spec.add_development_dependency 'rubocop', '~> 0.31.0'
26
+ spec.add_development_dependency 'guard-rubocop', '~> 1.2.0'
27
+ spec.add_development_dependency 'pry', '~> 0.10.1'
28
+ spec.add_development_dependency 'nokogiri', '~> 1.6.6'
29
+ end
@@ -0,0 +1,74 @@
1
+ require 'spec_helper'
2
+
3
+ describe MetaHari::Helpers::JsonLd do
4
+ let(:html) { resource_content('json_ld_example.html') }
5
+ let(:document) { Nokogiri::HTML html }
6
+ subject { described_class.new document }
7
+
8
+ describe '#initialize' do
9
+ it 'assigns the document to the reader' do
10
+ expect(subject.document).to be document
11
+ end
12
+ end
13
+
14
+ context 'when containing JSON-LD' do
15
+ describe '#json' do
16
+ subject { described_class.new(document).send :json }
17
+
18
+ it { should be_a Hash }
19
+ it { should have_key '@context' }
20
+ it { should have_key '@type' }
21
+ it { should have_key 'name' }
22
+ it { should have_key 'image' }
23
+ it { should have_key 'description' }
24
+
25
+ it 'is a product' do
26
+ expect(subject['@type']).to eql 'Product'
27
+ end
28
+ end
29
+
30
+ describe '#data' do
31
+ it 'uses #json' do
32
+ expect(subject).to receive(:json).and_return({})
33
+ subject.data
34
+ end
35
+
36
+ context 'when type matches' do
37
+ subject { described_class.new(document).data }
38
+
39
+ it { should be_a Hash }
40
+ it { should have_key '@context' }
41
+ it { should have_key '@type' }
42
+ it { should have_key 'name' }
43
+ it { should have_key 'image' }
44
+ it { should have_key 'description' }
45
+ end
46
+
47
+ context 'when type does not match' do
48
+ subject { described_class.new(document).data('Something else') }
49
+
50
+ it { should be_a Hash }
51
+ it { should_not have_key '@context' }
52
+ it { should_not have_key '@type' }
53
+ it { should_not have_key 'name' }
54
+ it { should_not have_key 'image' }
55
+ it { should_not have_key 'description' }
56
+ end
57
+ end
58
+ end
59
+
60
+ context 'when not containing JSON-LD' do
61
+ let(:html) { '' }
62
+
63
+ describe '#json' do
64
+ subject { described_class.new(document).send :json }
65
+
66
+ it { should be_a Hash }
67
+ it { should_not have_key '@context' }
68
+ it { should_not have_key '@type' }
69
+ it { should_not have_key 'name' }
70
+ it { should_not have_key 'image' }
71
+ it { should_not have_key 'description' }
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,47 @@
1
+ require 'spec_helper'
2
+
3
+ describe MetaHari::Spyglass::AmazonDe do
4
+ let(:uri) { URI.parse 'http://www.amazon.de/dp/B000LQXC2Q/ref=sr_1_1' }
5
+ let(:html) { '' }
6
+ let(:instance) { described_class.new uri }
7
+ subject { instance }
8
+
9
+ before :each do
10
+ allow(instance).to receive(:fetch_data).and_return(html)
11
+ end
12
+
13
+ it 'is suitable for amazon.de' do
14
+ uri = URI.parse 'http://amazon.de'
15
+ expect(described_class.suitable? uri).to be true
16
+ end
17
+
18
+ it 'is suitable for www.amazon.de' do
19
+ uri = URI.parse 'http://www.amazon.de'
20
+ expect(described_class.suitable? uri).to be true
21
+ end
22
+
23
+ context 'with valid amazn product page' do
24
+ let(:html) { resource_content 'amazon_de.html' }
25
+
26
+ it 'extracts the correct title' do
27
+ expected_value = 'Gastroback 42429 Design Wasserkocher Advanced Pro'
28
+ expect(subject.send :title).to eql expected_value
29
+ end
30
+
31
+ it 'extracts the correct image' do
32
+ expected_value =
33
+ 'http://ecx.images-amazon.com/images/I/814Yl6mxLsL._SL1500_.jpg'
34
+ expect(subject.send :image).to eql expected_value
35
+ end
36
+ end
37
+
38
+ describe '#spy' do
39
+ let(:html) { resource_content 'amazon_de.html' }
40
+ subject { instance.spy }
41
+
42
+ it { should be_an OpenStruct }
43
+ it { should respond_to :name }
44
+ it { should respond_to :image }
45
+ it { should respond_to :description }
46
+ end
47
+ end