seotracker 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -1,6 +1,6 @@
1
1
  source :rubygems
2
2
 
3
- group :runime do
3
+ group :runtime do
4
4
  gem 'mechanize'
5
5
  end
6
6
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.2
1
+ 0.3.0
@@ -1,7 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'mechanize'
3
3
  require 'logger'
4
- #require 'pry'
4
+ require 'pry'
5
5
 
6
6
  class Seotracker
7
7
  USER_AGENT = 'Mac Safari'
@@ -21,17 +21,25 @@ class Seotracker
21
21
  end
22
22
  end
23
23
 
24
+ # получаем позиции
25
+ # получаем массив ссылок от парсера
26
+ # увеличиваем счетчик позиций, пока не найдем нужную ссылку
27
+ # 4 ссылки храним в массиве "последних", чтобы не считать случайно выдранные парсером повторения
24
28
  def get_position(site, word, region = Seotracker::Yandex::MOSCOW, pages = 200)
25
- pos, found, start = 0, false, 0
29
+ pos, found, start, hrefs = 0, false, 0, []
26
30
  while (start < pages) && !found
27
31
  links = parse(word, start, region)
28
32
  start += RESULTS
29
33
 
30
34
  break if links == 'error'
31
35
  links.each do |l|
32
- href = l.attribute('href').value.downcase
33
- # убеждаемся, что это точно ссылка. а то бывает еще адрес
34
- pos += 1 if l.content.match(/.+\..+/)
36
+ href = get_link(l)
37
+ next if href == '' || hrefs.include?(href)
38
+
39
+ # храним 4 последние полученные ссылки
40
+ hrefs = hrefs.pop(3)
41
+ hrefs << href
42
+ pos += 1
35
43
  if href.rindex(site) && same_level(href, site)
36
44
  found = true
37
45
  break
@@ -55,6 +63,12 @@ class Seotracker
55
63
  return href.count('.') - 1 == site.count('.') if href.index('www')
56
64
  href.count('.') == site.count('.')
57
65
  end
66
+
67
+ def get_link(link)
68
+ href = link.attribute('href').value.downcase if link.attribute('href')
69
+ href ||= link.value.downcase
70
+ href.match(/[a-zA-Z0-9\-\.]+\.\w*/).to_s
71
+ end
58
72
  end
59
73
 
60
74
  require 'seotracker/yandex'
@@ -28,8 +28,10 @@ class Seotracker::Yandex < Seotracker
28
28
  url = SEARCH_URL + "text=#{word}&p=#{start}&lr=#{region}"
29
29
  page = @agent.get(url, [], nil, { 'cookie' => @cookie })
30
30
  begin
31
- page.root.xpath('/html/body/div[2]/div/div/div/ol/li/div/div/span/span/a')
31
+ elements = page.root.xpath('/html/body/div[2]/div/div/div/ol/li/div/div')
32
+ elements.map { |e| e.children.map { |c1| c1.children.map { |c2| c2.children } } }.flatten.compact.map { |e1| e1.attribute('href') }.flatten.compact
32
33
  rescue Exception => e
34
+ p e.message
33
35
  debug "can't parse yandex: #{e.message}"
34
36
  'error'
35
37
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "seotracker"
8
- s.version = "0.2.2"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["krivich ekaterina"]
12
- s.date = "2012-04-11"
12
+ s.date = "2012-04-12"
13
13
  s.description = "track sites position in google or yandex by keyword"
14
14
  s.email = "kiote_the_one@mail.ru"
15
15
  s.extra_rdoc_files = [
@@ -41,15 +41,18 @@ Gem::Specification.new do |s|
41
41
  s.specification_version = 3
42
42
 
43
43
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
44
+ s.add_runtime_dependency(%q<mechanize>, [">= 0"])
44
45
  s.add_development_dependency(%q<minitest>, [">= 0"])
45
46
  s.add_development_dependency(%q<jeweler>, [">= 0"])
46
47
  s.add_development_dependency(%q<pry>, [">= 0"])
47
48
  else
49
+ s.add_dependency(%q<mechanize>, [">= 0"])
48
50
  s.add_dependency(%q<minitest>, [">= 0"])
49
51
  s.add_dependency(%q<jeweler>, [">= 0"])
50
52
  s.add_dependency(%q<pry>, [">= 0"])
51
53
  end
52
54
  else
55
+ s.add_dependency(%q<mechanize>, [">= 0"])
53
56
  s.add_dependency(%q<minitest>, [">= 0"])
54
57
  s.add_dependency(%q<jeweler>, [">= 0"])
55
58
  s.add_dependency(%q<pry>, [">= 0"])
@@ -6,6 +6,8 @@ def yandex_mocker(mock)
6
6
  mock.expect(:get, mock, ['http://kiks.yandex.ru/su/'])
7
7
  mock.expect(:get, mock, [Seotracker::Yandex::SEARCH_URL + "text=#{@word}&p=0&lr=213", [], nil, {'cookie' => 'hi'}])
8
8
  mock.expect(:cookies, ['hi'])
9
+ mock.expect(:children, mock)
10
+ mock.expect(:map, mock)
9
11
  end
10
12
 
11
13
  def common_mocker
@@ -33,7 +35,7 @@ describe Seotracker do
33
35
  # мокаем все неважное
34
36
  mock = common_mocker
35
37
  yandex_mocker(mock)
36
- mock.expect(:xpath, [mock], %w\/html/body/div[2]/div/div/div/ol/li/div/div/span/span/a\)
38
+ mock.expect(:xpath, [mock], %w\/html/body/div[2]/div/div/div/ol/li/div/div\)
37
39
 
38
40
  @object.instance_variable_set(:@agent, mock)
39
41
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: seotracker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,22 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-11 00:00:00.000000000 Z
12
+ date: 2012-04-12 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mechanize
16
+ requirement: &70096863365260 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70096863365260
14
25
  - !ruby/object:Gem::Dependency
15
26
  name: minitest
16
- requirement: &70221806624540 !ruby/object:Gem::Requirement
27
+ requirement: &70096863364780 !ruby/object:Gem::Requirement
17
28
  none: false
18
29
  requirements:
19
30
  - - ! '>='
@@ -21,10 +32,10 @@ dependencies:
21
32
  version: '0'
22
33
  type: :development
23
34
  prerelease: false
24
- version_requirements: *70221806624540
35
+ version_requirements: *70096863364780
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: jeweler
27
- requirement: &70221806623740 !ruby/object:Gem::Requirement
38
+ requirement: &70096863364100 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ! '>='
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: '0'
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *70221806623740
46
+ version_requirements: *70096863364100
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: pry
38
- requirement: &70221806622960 !ruby/object:Gem::Requirement
49
+ requirement: &70096863363500 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ! '>='
@@ -43,7 +54,7 @@ dependencies:
43
54
  version: '0'
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *70221806622960
57
+ version_requirements: *70096863363500
47
58
  description: track sites position in google or yandex by keyword
48
59
  email: kiote_the_one@mail.ru
49
60
  executables: []
@@ -80,7 +91,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
80
91
  version: '0'
81
92
  segments:
82
93
  - 0
83
- hash: -3070310031504885579
94
+ hash: 1784715576718976177
84
95
  required_rubygems_version: !ruby/object:Gem::Requirement
85
96
  none: false
86
97
  requirements: