musicscrape 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/musicscrape.rb +6 -45
- metadata +50 -2
data/lib/musicscrape.rb
CHANGED
@@ -5,13 +5,15 @@ require 'rest-client'
|
|
5
5
|
|
6
6
|
|
7
7
|
class Scrape
|
8
|
-
|
8
|
+
#Scrape is meant to parse www.thestranger.com/music and pull out info on the recommended shows
|
9
|
+
#initialize with source_type = :web and url = "http://www.thestranger.com/music"
|
9
10
|
def initialize(source_type,url)
|
10
11
|
@source_type = source_type
|
11
12
|
@url = url
|
12
13
|
end
|
13
14
|
|
14
15
|
def load_page
|
16
|
+
#Uses RestClient to get the html
|
15
17
|
if @source_type == :web then
|
16
18
|
(RestClient.get(@url))
|
17
19
|
else
|
@@ -21,40 +23,18 @@ class Scrape
|
|
21
23
|
|
22
24
|
def remove_excess(text_in)
|
23
25
|
#removes everthing except the recommended events section
|
24
|
-
#text_in = text_in.partition("class=\"recommend_list\"").drop(1).join
|
25
26
|
text_in = text_in.partition("<h2 class=\"sitesection\">Recommended Music Events</h2>").drop(1).join
|
26
27
|
text_in.partition("<li class=")[0..1].join.strip
|
27
28
|
end
|
28
29
|
|
29
|
-
#puts (RestClient.get(@path))
|
30
30
|
def open_local
|
31
|
+
#opens a local file for testing
|
31
32
|
open("#{File.dirname(__FILE__)}/stranger.txt", &:read)
|
32
33
|
end
|
33
34
|
|
34
|
-
def get_event(page)
|
35
|
-
puts page
|
36
|
-
@music_text = page
|
37
|
-
@music_text = @music_text.partition(/\bevent=\b\d*.*/)[2]
|
38
|
-
|
39
|
-
my_array = @music_text.partition('</a><br/>')
|
40
|
-
#use the first item of the array minus leading and trailing white space as event title
|
41
|
-
event_hash = {}
|
42
|
-
event_hash[:title] = my_array[0]
|
43
|
-
@music_text = my_array[2].strip
|
44
|
-
|
45
|
-
#now partition it at <br /> and the first element is venue
|
46
|
-
my_array = @music_text.partition('<br />')
|
47
|
-
event_hash[:venue] = my_array[0]
|
48
|
-
@music_text = my_array[2].strip
|
49
|
-
|
50
|
-
event_hash
|
51
|
-
end
|
52
|
-
def get_field(id)
|
53
|
-
#gets one field from the page
|
54
|
-
|
55
|
-
|
56
|
-
end
|
57
35
|
def get_events(text_in)
|
36
|
+
#parses the html after it has been pared down and returns an array of hashes containing the data
|
37
|
+
#returns [[:title=> "", :venue=>"", :date=>"", :details=>""], ...]
|
58
38
|
@music_text = text_in
|
59
39
|
#while there are still events left do this
|
60
40
|
event_array = []
|
@@ -75,22 +55,6 @@ class Scrape
|
|
75
55
|
event_array
|
76
56
|
end
|
77
57
|
|
78
|
-
def count_words(s_s)
|
79
|
-
h = Hash.new(0)
|
80
|
-
s_s.scan("class=\"recommend_list\"") do |w|
|
81
|
-
h[w] += 1
|
82
|
-
end
|
83
|
-
h
|
84
|
-
end
|
85
|
-
|
86
|
-
end
|
87
|
-
class Tester
|
88
|
-
def testmethod
|
89
|
-
scrape = Scrape.new(:local,"x")
|
90
|
-
x = scrape.load_page
|
91
|
-
x = scrape.remove_excess(x)
|
92
|
-
scrape.get_event(x)
|
93
|
-
end
|
94
58
|
end
|
95
59
|
|
96
60
|
class Google
|
@@ -144,6 +108,3 @@ class Google
|
|
144
108
|
end
|
145
109
|
|
146
110
|
end
|
147
|
-
|
148
|
-
class GetUserInfo
|
149
|
-
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: musicscrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,55 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
date: 2012-12-11 00:00:00.000000000 Z
|
13
|
-
dependencies:
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: restclient
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: gcal4ruby
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rubygems
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
14
62
|
description: Goes to The Stranger's music page and parses the recommended music events
|
15
63
|
for the week
|
16
64
|
email: fakeemail@fakedomain.com
|