seotracker 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- coderay (1.0.5)
5
- domain_name (0.5.2)
4
+ coderay (1.0.6)
5
+ domain_name (0.5.3)
6
6
  unf (~> 0.0.3)
7
7
  git (1.2.5)
8
8
  jeweler (1.8.3)
@@ -21,7 +21,7 @@ GEM
21
21
  webrobots (~> 0.0, >= 0.0.9)
22
22
  method_source (0.7.1)
23
23
  mime-types (1.18)
24
- minitest (2.11.4)
24
+ minitest (2.12.1)
25
25
  net-http-digest_auth (1.2)
26
26
  net-http-persistent (2.6)
27
27
  nokogiri (1.5.2)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
@@ -1,6 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'mechanize'
3
3
  require 'logger'
4
+ #require 'pry'
4
5
 
5
6
  class Seotracker
6
7
  USER_AGENT = 'Mac Safari'
@@ -20,18 +21,18 @@ class Seotracker
20
21
  end
21
22
  end
22
23
 
23
- def get_position(site, word, pages = 200)
24
+ def get_position(site, word, region = Seotracker::Yandex::MOSCOW, pages = 200)
24
25
  pos, found, start = 0, false, 0
25
26
  while (start < pages) && !found
26
- links = parse(word, start)
27
+ links = parse(word, start, region)
27
28
  start += RESULTS
28
29
 
29
30
  break if links == 'error'
30
-
31
31
  links.each do |l|
32
- pos += 1
33
32
  href = l.attribute('href').value.downcase
34
- unless href.rindex(site).nil?
33
+ # убеждаемся, что это точно ссылка. а то бывает еще адрес
34
+ pos += 1 if l.content.match(/.+\..+/)
35
+ if href.rindex(site) && same_level(href, site)
35
36
  found = true
36
37
  break
37
38
  end
@@ -43,6 +44,17 @@ class Seotracker
43
44
  def debug(message)
44
45
  @log.debug(message) if @debug
45
46
  end
47
+
48
+ protected
49
+
50
+ # убеждаемся, что сайт, для которого определяем позицию (site)
51
+ # и найденная ссылка (href) на одном и том же уровне
52
+ # например, если искали позицию ya.ru
53
+ # а сначала нашлась позиция maps.ya.ru то ее не считаем
54
+ def same_level(href, site)
55
+ return href.count('.') - 1 == site.count('.') if href.index('www')
56
+ href.count('.') == site.count('.')
57
+ end
46
58
  end
47
59
 
48
60
  require 'seotracker/yandex'
@@ -3,7 +3,7 @@ class Seotracker::Google < Seotracker
3
3
 
4
4
  protected
5
5
 
6
- def parse(word, start = 0)
6
+ def parse(word, start = 0, region = nil)
7
7
  page = @agent.get(SEARCH_URL, q: word, start: start)
8
8
  page.root.xpath('/html/body/div[5]/div/div/div[4]/div[2]/div[2]/div/div[2]/div/ol/li/div/h3/a')
9
9
  end
@@ -2,33 +2,35 @@ class Seotracker::Yandex < Seotracker
2
2
  SEARCH_URL = 'http://yandex.ru/yandsearch?'
3
3
  WORDSTAT_URL = 'http://wordstat.yandex.ru/?cmd=words&page=1&&geo=&text_geo=&text='
4
4
 
5
+ MOSCOW = 213
6
+
5
7
  # получаем стастистику wordstat
6
8
  # word - слово, по которому получаем статистику
7
9
  def get_wordstat(word)
8
10
  url = WORDSTAT_URL + word
9
- get_cookie if @cookie.nil?
10
- page = @agent.get(url, [], nil, {'cookie' => @cookie})
11
+ @cookie || get_cookie
12
+ page = @agent.get(url, [], nil, { 'cookie' => @cookie })
11
13
  res = page.root.xpath('/html/body/form/table[2]/tbody/tr/td[4]/table/tbody/tr[3]/td/table/tbody/tr[2]/td[3]')
12
14
  begin
13
15
  res.first.content
14
16
  rescue Exception => e
15
- debug "can't get yandex wordstat:" + e.message
17
+ debug "can't get yandex wordstat: #{e.message}"
16
18
  0
17
19
  end
18
20
  end
19
21
 
20
22
  protected
21
23
 
22
- def parse(word, start = 0)
24
+ # начинаем парсить с первой страницы, регион по умолчанию - Москва
25
+ def parse(word, start = 0, region = nil)
23
26
  start /= 10
24
- get_cookie if @cookie.nil?
25
- url = SEARCH_URL + "text=#{word}&p=#{start}"
26
- page = @agent.get(url, [], nil, {'cookie' => @cookie})
27
-
27
+ @cookie || get_cookie
28
+ url = SEARCH_URL + "text=#{word}&p=#{start}&lr=#{region}"
29
+ page = @agent.get(url, [], nil, { 'cookie' => @cookie })
28
30
  begin
29
- page.root.xpath('/html/body/div[3]/div/div/div[2]/ol/li/div/h2/a')
31
+ page.root.xpath('/html/body/div[2]/div/div/div/ol/li/div/div/span/span/a')
30
32
  rescue Exception => e
31
- debug "can't parse yandex:" + e.message
33
+ debug "can't parse yandex: #{e.message}"
32
34
  'error'
33
35
  end
34
36
  end
@@ -1,9 +1,9 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  class Seotracker::Yandex::Direct < Seotracker::Yandex
4
- def special(word)
4
+ def special(word, region = MOSCOW)
5
5
  get_cookie
6
- url = SEARCH_URL + "text=#{word}&p=0"
6
+ url = SEARCH_URL + "text=#{word}&p=0&lr=#{region}"
7
7
  page = @agent.get(url, [], nil, {'cookie' => @cookie})
8
8
  begin
9
9
  texts = page.root.xpath('/html/body/div[3]/div/div/div/div/div[2]/div/h2/a')
@@ -21,7 +21,7 @@ class Seotracker::Yandex::Direct < Seotracker::Yandex
21
21
 
22
22
  result
23
23
  rescue Exception => e
24
- debug "can't parse yandex direct:" + e.message
24
+ debug "can't parse yandex direct: #{e.message}"
25
25
  'error'
26
26
  end
27
27
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "seotracker"
8
- s.version = "0.2.1"
8
+ s.version = "0.2.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["krivich ekaterina"]
12
- s.date = "2012-04-08"
12
+ s.date = "2012-04-11"
13
13
  s.description = "track sites position in google or yandex by keyword"
14
14
  s.email = "kiote_the_one@mail.ru"
15
15
  s.extra_rdoc_files = [
@@ -4,7 +4,7 @@ require 'spec_helper'
4
4
 
5
5
  def yandex_mocker(mock)
6
6
  mock.expect(:get, mock, ['http://kiks.yandex.ru/su/'])
7
- mock.expect(:get, mock, [Seotracker::Yandex::SEARCH_URL + "text=#{@word}&p=0", [], nil, {'cookie' => 'hi'}])
7
+ mock.expect(:get, mock, [Seotracker::Yandex::SEARCH_URL + "text=#{@word}&p=0&lr=213", [], nil, {'cookie' => 'hi'}])
8
8
  mock.expect(:cookies, ['hi'])
9
9
  end
10
10
 
@@ -13,6 +13,7 @@ def common_mocker
13
13
  mock.expect(:root, mock)
14
14
  mock.expect(:attribute, mock, %w/href/)
15
15
  mock.expect(:value, "http://#{@site}")
16
+ mock.expect(:content, 'yandex.ru')
16
17
  end
17
18
 
18
19
  describe Seotracker do
@@ -32,7 +33,7 @@ describe Seotracker do
32
33
  # мокаем все неважное
33
34
  mock = common_mocker
34
35
  yandex_mocker(mock)
35
- mock.expect(:xpath, [mock], %w\/html/body/div[3]/div/div/div[2]/ol/li/div/h2/a\)
36
+ mock.expect(:xpath, [mock], %w\/html/body/div[2]/div/div/div/ol/li/div/div/span/span/a\)
36
37
 
37
38
  @object.instance_variable_set(:@agent, mock)
38
39
  end
@@ -69,7 +70,6 @@ describe Seotracker::Yandex::Direct do
69
70
  yandex_mocker(mock)
70
71
  mock.expect(:xpath, [mock, mock, mock], %w\/html/body/div[3]/div/div/div/div/div[2]/div/h2/a\)
71
72
  mock.expect(:xpath, [mock, mock, mock], %w\/html/body/div[3]/div/div/div/div/div/div/div/div/span\)
72
- mock.expect(:content, 'yandex.ru')
73
73
 
74
74
  @client.instance_variable_set(:@agent, mock)
75
75
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: seotracker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-08 00:00:00.000000000 Z
12
+ date: 2012-04-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: minitest
16
- requirement: &70224553771600 !ruby/object:Gem::Requirement
16
+ requirement: &70221806624540 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70224553771600
24
+ version_requirements: *70221806624540
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: jeweler
27
- requirement: &70224553771100 !ruby/object:Gem::Requirement
27
+ requirement: &70221806623740 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70224553771100
35
+ version_requirements: *70221806623740
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: pry
38
- requirement: &70224553770340 !ruby/object:Gem::Requirement
38
+ requirement: &70221806622960 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70224553770340
46
+ version_requirements: *70221806622960
47
47
  description: track sites position in google or yandex by keyword
48
48
  email: kiote_the_one@mail.ru
49
49
  executables: []
@@ -80,7 +80,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
80
80
  version: '0'
81
81
  segments:
82
82
  - 0
83
- hash: -150072496354741973
83
+ hash: -3070310031504885579
84
84
  required_rubygems_version: !ruby/object:Gem::Requirement
85
85
  none: false
86
86
  requirements: