musicscrape 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/musicscrape.rb +6 -45
  2. metadata +50 -2
data/lib/musicscrape.rb CHANGED
@@ -5,13 +5,15 @@ require 'rest-client'
5
5
 
6
6
 
7
7
  class Scrape
8
- #@url='http://www.thestranger.com/seattle/Music' #The url for the web page we want
8
+ #Scrape is meant to parse www.thestranger.com/music and pull out info on the recommended shows
9
+ #initialize with source_type = :web and url = "http://www.thestranger.com/music"
9
10
  def initialize(source_type,url)
10
11
  @source_type = source_type
11
12
  @url = url
12
13
  end
13
14
 
14
15
  def load_page
16
+ #Uses RestClient to get the html
15
17
  if @source_type == :web then
16
18
  (RestClient.get(@url))
17
19
  else
@@ -21,40 +23,18 @@ class Scrape
21
23
 
22
24
  def remove_excess(text_in)
23
25
  #removes everthing except the recommended events section
24
- #text_in = text_in.partition("class=\"recommend_list\"").drop(1).join
25
26
  text_in = text_in.partition("<h2 class=\"sitesection\">Recommended Music Events</h2>").drop(1).join
26
27
  text_in.partition("<li class=")[0..1].join.strip
27
28
  end
28
29
 
29
- #puts (RestClient.get(@path))
30
30
  def open_local
31
+ #opens a local file for testing
31
32
  open("#{File.dirname(__FILE__)}/stranger.txt", &:read)
32
33
  end
33
34
 
34
- def get_event(page)
35
- puts page
36
- @music_text = page
37
- @music_text = @music_text.partition(/\bevent=\b\d*.*/)[2]
38
-
39
- my_array = @music_text.partition('</a><br/>')
40
- #use the first item of the array minus leading and trailing white space as event title
41
- event_hash = {}
42
- event_hash[:title] = my_array[0]
43
- @music_text = my_array[2].strip
44
-
45
- #now partition it at <br /> and the first element is venue
46
- my_array = @music_text.partition('<br />')
47
- event_hash[:venue] = my_array[0]
48
- @music_text = my_array[2].strip
49
-
50
- event_hash
51
- end
52
- def get_field(id)
53
- #gets one field from the page
54
-
55
-
56
- end
57
35
  def get_events(text_in)
36
+ #parses the html after it has been pared down and returns an array of hashes containing the data
37
+ #returns [[:title=> "", :venue=>"", :date=>"", :details=>""], ...]
58
38
  @music_text = text_in
59
39
  #while there are still events left do this
60
40
  event_array = []
@@ -75,22 +55,6 @@ class Scrape
75
55
  event_array
76
56
  end
77
57
 
78
- def count_words(s_s)
79
- h = Hash.new(0)
80
- s_s.scan("class=\"recommend_list\"") do |w|
81
- h[w] += 1
82
- end
83
- h
84
- end
85
-
86
- end
87
- class Tester
88
- def testmethod
89
- scrape = Scrape.new(:local,"x")
90
- x = scrape.load_page
91
- x = scrape.remove_excess(x)
92
- scrape.get_event(x)
93
- end
94
58
  end
95
59
 
96
60
  class Google
@@ -144,6 +108,3 @@ class Google
144
108
  end
145
109
 
146
110
  end
147
-
148
- class GetUserInfo
149
- end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: musicscrape
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,55 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
  date: 2012-12-11 00:00:00.000000000 Z
13
- dependencies: []
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: restclient
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: gcal4ruby
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rubygems
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
14
62
  description: Goes to The Stranger's music page and parses the recommended music events
15
63
  for the week
16
64
  email: fakeemail@fakedomain.com