gulesider 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/gulesider.rb +9 -0
- data/lib/screen_scraper.rb +52 -0
- data/lib/search_result.rb +13 -0
- metadata +58 -0
data/lib/gulesider.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
class ScreenScraper
|
2
|
+
def initialize
|
3
|
+
@gulesider = "http://www.gulesider.no/finn:"
|
4
|
+
@agent = Mechanize.new
|
5
|
+
end
|
6
|
+
|
7
|
+
def search number
|
8
|
+
search_webpage_for number
|
9
|
+
|
10
|
+
return scraped_person_data if number_belongs_to_a_person?
|
11
|
+
return scraped_company_data
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
def search_webpage_for number
|
16
|
+
@result = @agent.get @gulesider + number
|
17
|
+
end
|
18
|
+
|
19
|
+
def number_belongs_to_a_person?
|
20
|
+
@agent.current_page.canonical_uri.to_s.include? "http://www.gulesider.no/person"
|
21
|
+
end
|
22
|
+
|
23
|
+
def scraped_person_data
|
24
|
+
vcard = @result.search "div.vcard"
|
25
|
+
address = vcard.search "span.adr"
|
26
|
+
|
27
|
+
return SearchResult.new({
|
28
|
+
:person => true,
|
29
|
+
:name => vcard.search("span.given-name").text + " " + vcard.search("span.family-name").text,
|
30
|
+
:phone => vcard.search("li.tel:first-child").text,
|
31
|
+
:street_name => address.search("span.street-address").text,
|
32
|
+
:postal_code => address.search("span.postal-code").text,
|
33
|
+
:city => address.search("span.locality").text,
|
34
|
+
:latitude => address.search("span.latitude").text,
|
35
|
+
:longitude => address.search("span.longitude").text
|
36
|
+
})
|
37
|
+
end
|
38
|
+
|
39
|
+
def scraped_company_data
|
40
|
+
list = @result.search "#result-list"
|
41
|
+
first_hit = list.search "div.hit"
|
42
|
+
|
43
|
+
return SearchResult.new({
|
44
|
+
:company => true,
|
45
|
+
:name => first_hit.search("div.header").search("a").text,
|
46
|
+
:phone => first_hit.search("li.tel:first-child").text,
|
47
|
+
:street_name => first_hit.search("span.street-address").text,
|
48
|
+
:postal_code => first_hit.search("span.postal-code").text,
|
49
|
+
:city => first_hit.search("span.locality").text
|
50
|
+
})
|
51
|
+
end
|
52
|
+
end
|
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gulesider
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Goeran Hansen
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-01-02 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: mechanize
|
16
|
+
requirement: &70351980572740 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '2.1'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70351980572740
|
25
|
+
description:
|
26
|
+
email: mail@goeran.no
|
27
|
+
executables: []
|
28
|
+
extensions: []
|
29
|
+
extra_rdoc_files: []
|
30
|
+
files:
|
31
|
+
- lib/search_result.rb
|
32
|
+
- lib/screen_scraper.rb
|
33
|
+
- lib/gulesider.rb
|
34
|
+
homepage: http://github.com/goeran/gulesider
|
35
|
+
licenses: []
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
requirements: []
|
53
|
+
rubyforge_project:
|
54
|
+
rubygems_version: 1.8.10
|
55
|
+
signing_key:
|
56
|
+
specification_version: 3
|
57
|
+
summary: Screen scraper for gulesider.no. A globa Norwegian phone book
|
58
|
+
test_files: []
|