musicscrape 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/musicscrape.rb +6 -45
- metadata +50 -2
data/lib/musicscrape.rb
CHANGED
@@ -5,13 +5,15 @@ require 'rest-client'
|
|
5
5
|
|
6
6
|
|
7
7
|
class Scrape
|
8
|
-
|
8
|
+
#Scrape is meant to parse www.thestranger.com/music and pull out info on the recommended shows
|
9
|
+
#initialize with source_type = :web and url = "http://www.thestranger.com/music"
|
9
10
|
def initialize(source_type,url)
|
10
11
|
@source_type = source_type
|
11
12
|
@url = url
|
12
13
|
end
|
13
14
|
|
14
15
|
def load_page
|
16
|
+
#Uses RestClient to get the html
|
15
17
|
if @source_type == :web then
|
16
18
|
(RestClient.get(@url))
|
17
19
|
else
|
@@ -21,40 +23,18 @@ class Scrape
|
|
21
23
|
|
22
24
|
def remove_excess(text_in)
|
23
25
|
#removes everthing except the recommended events section
|
24
|
-
#text_in = text_in.partition("class=\"recommend_list\"").drop(1).join
|
25
26
|
text_in = text_in.partition("<h2 class=\"sitesection\">Recommended Music Events</h2>").drop(1).join
|
26
27
|
text_in.partition("<li class=")[0..1].join.strip
|
27
28
|
end
|
28
29
|
|
29
|
-
#puts (RestClient.get(@path))
|
30
30
|
def open_local
|
31
|
+
#opens a local file for testing
|
31
32
|
open("#{File.dirname(__FILE__)}/stranger.txt", &:read)
|
32
33
|
end
|
33
34
|
|
34
|
-
def get_event(page)
|
35
|
-
puts page
|
36
|
-
@music_text = page
|
37
|
-
@music_text = @music_text.partition(/\bevent=\b\d*.*/)[2]
|
38
|
-
|
39
|
-
my_array = @music_text.partition('</a><br/>')
|
40
|
-
#use the first item of the array minus leading and trailing white space as event title
|
41
|
-
event_hash = {}
|
42
|
-
event_hash[:title] = my_array[0]
|
43
|
-
@music_text = my_array[2].strip
|
44
|
-
|
45
|
-
#now partition it at <br /> and the first element is venue
|
46
|
-
my_array = @music_text.partition('<br />')
|
47
|
-
event_hash[:venue] = my_array[0]
|
48
|
-
@music_text = my_array[2].strip
|
49
|
-
|
50
|
-
event_hash
|
51
|
-
end
|
52
|
-
def get_field(id)
|
53
|
-
#gets one field from the page
|
54
|
-
|
55
|
-
|
56
|
-
end
|
57
35
|
def get_events(text_in)
|
36
|
+
#parses the html after it has been pared down and returns an array of hashes containing the data
|
37
|
+
#returns [[:title=> "", :venue=>"", :date=>"", :details=>""], ...]
|
58
38
|
@music_text = text_in
|
59
39
|
#while there are still events left do this
|
60
40
|
event_array = []
|
@@ -75,22 +55,6 @@ class Scrape
|
|
75
55
|
event_array
|
76
56
|
end
|
77
57
|
|
78
|
-
def count_words(s_s)
|
79
|
-
h = Hash.new(0)
|
80
|
-
s_s.scan("class=\"recommend_list\"") do |w|
|
81
|
-
h[w] += 1
|
82
|
-
end
|
83
|
-
h
|
84
|
-
end
|
85
|
-
|
86
|
-
end
|
87
|
-
class Tester
|
88
|
-
def testmethod
|
89
|
-
scrape = Scrape.new(:local,"x")
|
90
|
-
x = scrape.load_page
|
91
|
-
x = scrape.remove_excess(x)
|
92
|
-
scrape.get_event(x)
|
93
|
-
end
|
94
58
|
end
|
95
59
|
|
96
60
|
class Google
|
@@ -144,6 +108,3 @@ class Google
|
|
144
108
|
end
|
145
109
|
|
146
110
|
end
|
147
|
-
|
148
|
-
class GetUserInfo
|
149
|
-
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: musicscrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,55 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
date: 2012-12-11 00:00:00.000000000 Z
|
13
|
-
dependencies:
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: restclient
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: gcal4ruby
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rubygems
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
14
62
|
description: Goes to The Stranger's music page and parses the recommended music events
|
15
63
|
for the week
|
16
64
|
email: fakeemail@fakedomain.com
|