bataille 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
3
+
4
+ gem 'nokogiri'
5
+
6
+ group :test do
7
+ gem 'webmock'
8
+ gem 'rspec'
9
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 kotohata
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,65 @@
1
+ # Bataille
2
+
3
+ Bataille is smart custom web searcher and analyzer for SEO.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'bataille'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install bataille
18
+
19
+ ## Usage
20
+
21
+
22
+ ### Search
23
+ - - -
24
+
25
+ results = Bataille::Search.google_search("test") #=> #<Bataille::SearchResult>
26
+
27
+ site = results.first #=> #<Bataille::Site>
28
+ site.url #=> "test.jp/"
29
+ site.title #=> "test title"
30
+ site.description #=> "test description"
31
+ site.rank #=> 1 //search result page ranking
32
+
33
+ or you can specify the number of result, default length is 10
34
+
35
+ result = Bataille::Search.google_search("test", 20)
36
+ result.length # 20
37
+
38
+ ---
39
+ you can use finder for search result
40
+
41
+ results = Bataille::Search.google_search("test")
42
+ results.where(title: "hoge")
43
+ results.where(description: "fuga")
44
+ results.where(url: /com$/)
45
+ # you can chain finders
46
+ results.where(title: "hoge").where(url: /com$/)
47
+ # or
48
+ results.where({title: "hoge", url: /com$/})
49
+
50
+ Bataille::SearchResult#where returns another Bataille::SearchResult instance.
51
+
52
+ if you want to get the only 1 result, use '#find_by'
53
+
54
+ results.find_by(:url, /com$/) #=> #<Bataille::Site>
55
+
56
+
57
+
58
+
59
+ ## Contributing
60
+
61
+ 1. Fork it
62
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
63
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
64
+ 4. Push to the branch (`git push origin my-new-feature`)
65
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bataille.gemspec ADDED
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'bataille/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "bataille"
8
+ gem.version = Bataille::VERSION
9
+ gem.authors = ["kotohata"]
10
+ gem.email = ["t.kotohata@gmail.com"]
11
+ gem.description = %q{Bataille is custom web searcher and utility for SEO}
12
+ gem.summary = %q{custom web searcher and utilify for SEO}
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ end
@@ -0,0 +1,13 @@
1
+ # coding: utf-8
2
+ module Bataille
3
+ class Analytics
4
+ def initialize(search_result, site)
5
+ @search_result = search_result
6
+ @site = site
7
+ end
8
+
9
+ def rate
10
+ @search_result.detect { |x| x.url == @site.url }.rank
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ module Bataille
3
+ class Category
4
+ attr_accessor :words, :name
5
+ def initialize(name, words=[])
6
+ @name = name
7
+ @words = words
8
+ end
9
+
10
+ def &(category)
11
+ words = self.words.product(category.words).map { |x| x.join(" ") }
12
+ name = "#{@name} x #{category.name}"
13
+ self.class.new(name, words)
14
+ end
15
+
16
+ def google_search(limit=10)
17
+ ResultSet.new(
18
+ self.words.map do |word|
19
+ Search.google_search(word, limit)
20
+ end,
21
+ @name
22
+ )
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,16 @@
1
+ # coding: utf-8
2
+ module Bataille
3
+ class ResultSet
4
+ attr_accessor :category
5
+ include Enumerable
6
+
7
+ def initialize(results, category)
8
+ @results = results
9
+ @categories= []
10
+ end
11
+
12
+ def each
13
+ @results.each { |r| yield r }
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,55 @@
1
+ # coding: utf-8
2
+ require 'open-uri'
3
+
4
+ module Bataille
5
+ class Search
6
+ SEARCH_URL_PREFIX = "http://www.google.co.jp/search"
7
+ MAX_SEARCH_LIMIT = 50
8
+
9
+ class << self
10
+
11
+ def google_search(word, limit=10)
12
+ unless (1..MAX_SEARCH_LIMIT).include?(limit)
13
+ raise LimitError, limit > MAX_SEARCH_LIMIT ? "search results should be under 50" : "something wrong with limit parameter"
14
+ end
15
+ results = 0.upto(fetch_times_for(limit)).map do |n|
16
+ fetch_result(word, n*10)
17
+ end.inject(:+)
18
+
19
+ SearchResult.new(results[0..(limit-1)], word)
20
+ end
21
+
22
+ def fetch_result(word, start=0)
23
+ charset = nil
24
+ html = open(search_url(word, start)) do |f|
25
+ charset = f.charset
26
+ f.read
27
+ end
28
+ doc = Nokogiri::HTML.parse(html, nil, charset)
29
+ doc.css('.g').each_with_index.map do |result, i|
30
+ Site.new(
31
+ rank: i+1+start,
32
+ keyword: word,
33
+ title: result.css('.r').text,
34
+ url: result.css('.kv').search('cite').text,
35
+ description: result.css('.st').text
36
+ )
37
+ end
38
+ end
39
+
40
+ private
41
+ def search_url(word, start)
42
+ url = SEARCH_URL_PREFIX + "?q=#{word}"
43
+ url << "&start=#{start}" if start
44
+ #url << "&ie=UTF-8"
45
+ URI.encode(url)
46
+ end
47
+
48
+ def fetch_times_for(n)
49
+ (n-1)/10 + 1
50
+ end
51
+ end
52
+
53
+ class LimitError < StandardError; end
54
+ end
55
+ end
@@ -0,0 +1,76 @@
1
+ # coding: utf-8
2
+ module Bataille
3
+ class SearchResult
4
+ attr_accessor :results, :word
5
+ include Enumerable
6
+ def initialize(results, word)
7
+ @results, @word = results, word
8
+ end
9
+
10
+ def length
11
+ @results.length
12
+ end
13
+
14
+ def each
15
+ @results.each { |r| yield r }
16
+ end
17
+
18
+ def limit(n=1)
19
+ self.class.new(@results[0..n-1])
20
+ end
21
+ alias :top :limit
22
+
23
+ def where(conditions, opt={})
24
+ conditions.inject(self) do |result,(key, value)|
25
+ Bataille::SearchResult.new(finder(key, value, opt), @word)
26
+ end
27
+ end
28
+
29
+ def find_by(match, site, opt={})
30
+ finder(match, site, opt).first
31
+ end
32
+
33
+ private
34
+ def finder(match, site, opt={})
35
+ #
36
+ # Bataille::Site instance or String are expected to site.
37
+ #
38
+ case site
39
+ when Bataille::Site
40
+ target = site.send(match)
41
+ when String, Regexp
42
+ target = site
43
+ else
44
+ raise ArgumentError
45
+ end
46
+
47
+ #
48
+ # perfect matching if opt[:perfect] == true
49
+ # default is ambiguous matching
50
+ #
51
+ # when target is regexp object, opt[:perfect] is ignored
52
+ #
53
+ if target.instance_of?(Regexp)
54
+ result = self.find_all { |x| x.send(match) =~ target }
55
+ else
56
+ if opt[:perfect]
57
+ result = self.find_all { |x| x.send(match) == target }
58
+ else
59
+ result = self.find_all { |x| x.send(match).include?(target) }
60
+ end
61
+ end
62
+
63
+ #
64
+ # if protocol is specified, matches results which have https protocol
65
+ # default, any protocol matches
66
+ #
67
+
68
+ if opt[:protocol] == "https"
69
+ result = result.find_all { |x| URI.parse(x.url).scheme == "https" }
70
+ elsif opt[:protocol] == "http"
71
+ result = result.find_all { |x| URI.parse(URI.encode(x.url)).scheme.nil? }
72
+ end
73
+ result
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,19 @@
1
+ # coding: utf-8
2
+ module Bataille
3
+ class Site
4
+ attr_accessor :rank, :title, :url, :description, :word
5
+
6
+ def initialize(args={})
7
+ @rank, @title, @url, @description, @keyword =
8
+ args[:rank], args[:title], args[:url], args[:description], args[:keyword]
9
+ end
10
+
11
+ def word_count(attr)
12
+ if [:title, :url, :description].include? attr.to_sym
13
+ self.send(attr).scan(/#{@keyword}/i).length
14
+ else
15
+ raise ArgumentError
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,3 @@
1
+ module Bataille
2
+ VERSION = "0.0.1"
3
+ end
data/lib/bataille.rb ADDED
@@ -0,0 +1,10 @@
1
+ require_relative "bataille/version"
2
+ require_relative 'bataille/site'
3
+ require_relative 'bataille/search'
4
+ require_relative 'bataille/category'
5
+ require_relative 'bataille/search_result'
6
+ require_relative 'bataille/result_set'
7
+ require_relative 'bataille/analytics'
8
+
9
+ module Bataille
10
+ end
@@ -0,0 +1,12 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Bataille::Analytics do
5
+ include_context 'make 10 sites'
6
+ let(:search_result) { Bataille::SearchResult.new(sites, "hoge") }
7
+ let(:site) { Bataille::Site.new(url: "http://url1.jp") }
8
+ subject { search_result.analyze(site) }
9
+
10
+ describe '#rate' do
11
+ end
12
+ end
@@ -0,0 +1,21 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Bataille::Category do
5
+ let(:category1) { Bataille::Category.new("animal", [ "cat", "dog"] ) }
6
+ let(:category2) { Bataille::Category.new("country", [ "japan", "america" ]) }
7
+ subject { category1 & category2 }
8
+
9
+ describe "#&" do
10
+ its(:words) { should =~ ["cat japan", "dog japan", "cat america", "dog america"] }
11
+ its(:name) { should eq "animal x country" }
12
+ end
13
+
14
+ describe '#google_search' do
15
+ include_context 'make 10 sites'
16
+ before { stub_response_for(sites) }
17
+ its('google_search') { should have(4).results }
18
+ its('google_search') { should be_instance_of(Bataille::ResultSet) }
19
+ its('google_search.category') { should eq "animal x country" }
20
+ end
21
+ end
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Bataille::SearchResult do
5
+ include_context 'make 10 sites'
6
+ subject { Bataille::SearchResult.new(sites, "hoge") }
7
+
8
+ describe '#find_by' do
9
+ context 'when the matching record exists' do
10
+ let(:site) { Bataille::Site.new(url: "http://url1.jp") }
11
+ it { subject.find_by(:url, site).should be_instance_of(Bataille::Site) }
12
+ end
13
+
14
+ context 'when the matching record does not exist' do
15
+ let(:site) { Bataille::Site.new(url: "http://url11.jp") }
16
+ it { subject.find_by(:url, site).should_not be }
17
+ end
18
+
19
+ context 'when target argument is passed with regexp' do
20
+ it { subject.find_by(:url, /jp$/).should be }
21
+ it { subject.find_by(:url, /com$/).should_not be }
22
+ end
23
+
24
+ context 'when target argument is passed with string' do
25
+ it { subject.find_by(:url, "jp").should be }
26
+ it { subject.find_by(:url, "com").should_not be }
27
+ it { subject.find_by(:url, "jp", perfect: true).should_not be }
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,46 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Bataille::Search do
5
+
6
+ include_context 'make 10 sites'
7
+
8
+ before { stub_response_for(sites) }
9
+
10
+ describe '.google_search' do
11
+ context 'when limit is over 50' do
12
+ it 'should raise limit error' do
13
+ expect {
14
+ Bataille::Search.google_search('hoge', 100)
15
+ }.to raise_error(Bataille::Search::LimitError)
16
+ end
17
+ end
18
+
19
+ context 'limit is specified' do
20
+ subject { Bataille::Search.google_search('hoge', 20) }
21
+ it { should have(20).results }
22
+ its('word') { should eq 'hoge' }
23
+ end
24
+
25
+ context 'limit is not specified' do
26
+ subject { Bataille::Search.google_search('hoge') }
27
+ it { should have(10).results }
28
+ end
29
+ end
30
+
31
+ describe '.fetch_result' do
32
+ subject { Bataille::Search.fetch_result('hoge') }
33
+ it { should have(10).site }
34
+ its('first') { should be_instance_of(Bataille::Site) }
35
+ its('first.url') { should eq "http://url1.jp" }
36
+ its('last.url') { should eq "http://url10.jp" }
37
+ end
38
+
39
+ describe '.fetch_times_for' do
40
+ it 'should round up to the nearest whole number' do
41
+ Bataille::Search.send(:fetch_times_for, 3).should eq 1
42
+ Bataille::Search.send(:fetch_times_for, 10).should eq 1
43
+ Bataille::Search.send(:fetch_times_for, 21).should eq 3
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,15 @@
1
+ shared_context 'make 10 sites' do
2
+ let!(:sites) do
3
+ 10.times.map do |n|
4
+ double(
5
+ Bataille::Site,
6
+ title: "Title#{n+1}",
7
+ url: "http://url#{n+1}.jp",
8
+ description: "Description#{n+1}",
9
+ rank: n+1
10
+ ).tap do |d|
11
+ d.stub(:instance_of?, with: Bataille::Site).and_return true
12
+ end
13
+ end
14
+ end
15
+ end
data/spec/site_spec.rb ADDED
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Bataille::Site do
5
+ subject do
6
+ Bataille::Site.new(
7
+ rank: 1,
8
+ title: "Test Title Test test",
9
+ url: "http://test.com",
10
+ description: "test test this is test",
11
+ keyword: "test"
12
+ )
13
+ end
14
+
15
+ describe '#word_count' do
16
+ it { subject.word_count(:description).should eq 3 }
17
+ it { subject.word_count(:title).should eq 3 }
18
+ it { subject.word_count(:url).should eq 1 }
19
+
20
+ it "should raise argument error" do
21
+ expect {
22
+ subject.word_count(:hoge)
23
+ }.to raise_error(ArgumentError)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,24 @@
1
+ require 'bataille'
2
+ require 'webmock/rspec'
3
+ require 'shared_context'
4
+
5
+ def build_dom_for(sites)
6
+ sites.each.map do |site|
7
+ <<-CONTENT
8
+ <result#{site.rank} class="g">
9
+ <div class="r">#{site.title}</div>
10
+ <div class="kv">
11
+ <cite>#{site.url}</cite>
12
+ </div>
13
+ <div class="st">#{site.description}</div>
14
+ </result#{site.rank}>
15
+ CONTENT
16
+ end.inject(:+)
17
+ end
18
+
19
+ def stub_response_for(sites)
20
+ stub_request(:get, %r!#{Bataille::Search::SEARCH_URL_PREFIX}\?q=[^&]*\&start=[^&]*!).
21
+ to_return(
22
+ body: "<html><body>"+build_dom_for(sites)+"</body></html>"
23
+ )
24
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bataille
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - kotohata
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-01 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Bataille is custom web searcher and utility for SEO
15
+ email:
16
+ - t.kotohata@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - Gemfile
23
+ - LICENSE.txt
24
+ - README.md
25
+ - Rakefile
26
+ - bataille.gemspec
27
+ - lib/bataille.rb
28
+ - lib/bataille/analytics.rb
29
+ - lib/bataille/category.rb
30
+ - lib/bataille/result_set.rb
31
+ - lib/bataille/search.rb
32
+ - lib/bataille/search_result.rb
33
+ - lib/bataille/site.rb
34
+ - lib/bataille/version.rb
35
+ - spec/analytics_spec.rb
36
+ - spec/category_spec.rb
37
+ - spec/search_result_spec.rb
38
+ - spec/search_spec.rb
39
+ - spec/shared_context.rb
40
+ - spec/site_spec.rb
41
+ - spec/spec_helper.rb
42
+ homepage: ''
43
+ licenses: []
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements: []
61
+ rubyforge_project:
62
+ rubygems_version: 1.8.24
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: custom web searcher and utilify for SEO
66
+ test_files:
67
+ - spec/analytics_spec.rb
68
+ - spec/category_spec.rb
69
+ - spec/search_result_spec.rb
70
+ - spec/search_spec.rb
71
+ - spec/shared_context.rb
72
+ - spec/site_spec.rb
73
+ - spec/spec_helper.rb