seotracker 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -1,6 +1,6 @@
1
1
  source :rubygems
2
2
 
3
- group :runime do
3
+ group :runtime do
4
4
  gem 'mechanize'
5
5
  end
6
6
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.2
1
+ 0.3.0
@@ -1,7 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'mechanize'
3
3
  require 'logger'
4
- #require 'pry'
4
+ require 'pry'
5
5
 
6
6
  class Seotracker
7
7
  USER_AGENT = 'Mac Safari'
@@ -21,17 +21,25 @@ class Seotracker
21
21
  end
22
22
  end
23
23
 
24
+ # получаем позиции
25
+ # получаем массив ссылок от парсера
26
+ # увеличиваем счетчик позиций, пока не найдем нужную ссылку
27
+ # 4 ссылки храним в массиве "последних", чтобы не считать случайно выдранные парсером повторения
24
28
  def get_position(site, word, region = Seotracker::Yandex::MOSCOW, pages = 200)
25
- pos, found, start = 0, false, 0
29
+ pos, found, start, hrefs = 0, false, 0, []
26
30
  while (start < pages) && !found
27
31
  links = parse(word, start, region)
28
32
  start += RESULTS
29
33
 
30
34
  break if links == 'error'
31
35
  links.each do |l|
32
- href = l.attribute('href').value.downcase
33
- # убеждаемся, что это точно ссылка. а то бывает еще адрес
34
- pos += 1 if l.content.match(/.+\..+/)
36
+ href = get_link(l)
37
+ next if href == '' || hrefs.include?(href)
38
+
39
+ # храним 4 последние полученные ссылки
40
+ hrefs = hrefs.pop(3)
41
+ hrefs << href
42
+ pos += 1
35
43
  if href.rindex(site) && same_level(href, site)
36
44
  found = true
37
45
  break
@@ -55,6 +63,12 @@ class Seotracker
55
63
  return href.count('.') - 1 == site.count('.') if href.index('www')
56
64
  href.count('.') == site.count('.')
57
65
  end
66
+
67
+ def get_link(link)
68
+ href = link.attribute('href').value.downcase if link.attribute('href')
69
+ href ||= link.value.downcase
70
+ href.match(/[a-zA-Z0-9\-\.]+\.\w*/).to_s
71
+ end
58
72
  end
59
73
 
60
74
  require 'seotracker/yandex'
@@ -28,8 +28,10 @@ class Seotracker::Yandex < Seotracker
28
28
  url = SEARCH_URL + "text=#{word}&p=#{start}&lr=#{region}"
29
29
  page = @agent.get(url, [], nil, { 'cookie' => @cookie })
30
30
  begin
31
- page.root.xpath('/html/body/div[2]/div/div/div/ol/li/div/div/span/span/a')
31
+ elements = page.root.xpath('/html/body/div[2]/div/div/div/ol/li/div/div')
32
+ elements.map { |e| e.children.map { |c1| c1.children.map { |c2| c2.children } } }.flatten.compact.map { |e1| e1.attribute('href') }.flatten.compact
32
33
  rescue Exception => e
34
+ p e.message
33
35
  debug "can't parse yandex: #{e.message}"
34
36
  'error'
35
37
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "seotracker"
8
- s.version = "0.2.2"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["krivich ekaterina"]
12
- s.date = "2012-04-11"
12
+ s.date = "2012-04-12"
13
13
  s.description = "track sites position in google or yandex by keyword"
14
14
  s.email = "kiote_the_one@mail.ru"
15
15
  s.extra_rdoc_files = [
@@ -41,15 +41,18 @@ Gem::Specification.new do |s|
41
41
  s.specification_version = 3
42
42
 
43
43
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
44
+ s.add_runtime_dependency(%q<mechanize>, [">= 0"])
44
45
  s.add_development_dependency(%q<minitest>, [">= 0"])
45
46
  s.add_development_dependency(%q<jeweler>, [">= 0"])
46
47
  s.add_development_dependency(%q<pry>, [">= 0"])
47
48
  else
49
+ s.add_dependency(%q<mechanize>, [">= 0"])
48
50
  s.add_dependency(%q<minitest>, [">= 0"])
49
51
  s.add_dependency(%q<jeweler>, [">= 0"])
50
52
  s.add_dependency(%q<pry>, [">= 0"])
51
53
  end
52
54
  else
55
+ s.add_dependency(%q<mechanize>, [">= 0"])
53
56
  s.add_dependency(%q<minitest>, [">= 0"])
54
57
  s.add_dependency(%q<jeweler>, [">= 0"])
55
58
  s.add_dependency(%q<pry>, [">= 0"])
@@ -6,6 +6,8 @@ def yandex_mocker(mock)
6
6
  mock.expect(:get, mock, ['http://kiks.yandex.ru/su/'])
7
7
  mock.expect(:get, mock, [Seotracker::Yandex::SEARCH_URL + "text=#{@word}&p=0&lr=213", [], nil, {'cookie' => 'hi'}])
8
8
  mock.expect(:cookies, ['hi'])
9
+ mock.expect(:children, mock)
10
+ mock.expect(:map, mock)
9
11
  end
10
12
 
11
13
  def common_mocker
@@ -33,7 +35,7 @@ describe Seotracker do
33
35
  # мокаем все неважное
34
36
  mock = common_mocker
35
37
  yandex_mocker(mock)
36
- mock.expect(:xpath, [mock], %w\/html/body/div[2]/div/div/div/ol/li/div/div/span/span/a\)
38
+ mock.expect(:xpath, [mock], %w\/html/body/div[2]/div/div/div/ol/li/div/div\)
37
39
 
38
40
  @object.instance_variable_set(:@agent, mock)
39
41
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: seotracker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,22 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-11 00:00:00.000000000 Z
12
+ date: 2012-04-12 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mechanize
16
+ requirement: &70096863365260 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70096863365260
14
25
  - !ruby/object:Gem::Dependency
15
26
  name: minitest
16
- requirement: &70221806624540 !ruby/object:Gem::Requirement
27
+ requirement: &70096863364780 !ruby/object:Gem::Requirement
17
28
  none: false
18
29
  requirements:
19
30
  - - ! '>='
@@ -21,10 +32,10 @@ dependencies:
21
32
  version: '0'
22
33
  type: :development
23
34
  prerelease: false
24
- version_requirements: *70221806624540
35
+ version_requirements: *70096863364780
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: jeweler
27
- requirement: &70221806623740 !ruby/object:Gem::Requirement
38
+ requirement: &70096863364100 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ! '>='
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: '0'
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *70221806623740
46
+ version_requirements: *70096863364100
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: pry
38
- requirement: &70221806622960 !ruby/object:Gem::Requirement
49
+ requirement: &70096863363500 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ! '>='
@@ -43,7 +54,7 @@ dependencies:
43
54
  version: '0'
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *70221806622960
57
+ version_requirements: *70096863363500
47
58
  description: track sites position in google or yandex by keyword
48
59
  email: kiote_the_one@mail.ru
49
60
  executables: []
@@ -80,7 +91,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
80
91
  version: '0'
81
92
  segments:
82
93
  - 0
83
- hash: -3070310031504885579
94
+ hash: 1784715576718976177
84
95
  required_rubygems_version: !ruby/object:Gem::Requirement
85
96
  none: false
86
97
  requirements: