musicscrape 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/musicscrape +4 -0
- data/lib/musicscrape.rb +149 -0
- metadata +48 -0
data/bin/musicscrape
ADDED
data/lib/musicscrape.rb
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rest-client'
|
3
|
+
#This gem goes to www.thestranger.com/seattle/Music and scrapes data for all the recommended concerts.
|
4
|
+
#The intent is to have it put them on your google calendar but it hasn't gotten that far yet.
|
5
|
+
|
6
|
+
|
7
|
+
class Scrape
|
8
|
+
#@url='http://www.thestranger.com/seattle/Music' #The url for the web page we want
|
9
|
+
def initialize(source_type,url)
|
10
|
+
@source_type = source_type
|
11
|
+
@url = url
|
12
|
+
end
|
13
|
+
|
14
|
+
def load_page
|
15
|
+
if @source_type == :web then
|
16
|
+
(RestClient.get(@url))
|
17
|
+
else
|
18
|
+
open("#{File.dirname(__FILE__)}/stranger.txt", &:read)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def remove_excess(text_in)
|
23
|
+
#removes everthing except the recommended events section
|
24
|
+
#text_in = text_in.partition("class=\"recommend_list\"").drop(1).join
|
25
|
+
text_in = text_in.partition("<h2 class=\"sitesection\">Recommended Music Events</h2>").drop(1).join
|
26
|
+
text_in.partition("<li class=")[0..1].join.strip
|
27
|
+
end
|
28
|
+
|
29
|
+
#puts (RestClient.get(@path))
|
30
|
+
def open_local
|
31
|
+
open("#{File.dirname(__FILE__)}/stranger.txt", &:read)
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_event(page)
|
35
|
+
puts page
|
36
|
+
@music_text = page
|
37
|
+
@music_text = @music_text.partition(/\bevent=\b\d*.*/)[2]
|
38
|
+
|
39
|
+
my_array = @music_text.partition('</a><br/>')
|
40
|
+
#use the first item of the array minus leading and trailing white space as event title
|
41
|
+
event_hash = {}
|
42
|
+
event_hash[:title] = my_array[0]
|
43
|
+
@music_text = my_array[2].strip
|
44
|
+
|
45
|
+
#now partition it at <br /> and the first element is venue
|
46
|
+
my_array = @music_text.partition('<br />')
|
47
|
+
event_hash[:venue] = my_array[0]
|
48
|
+
@music_text = my_array[2].strip
|
49
|
+
|
50
|
+
event_hash
|
51
|
+
end
|
52
|
+
def get_field(id)
|
53
|
+
#gets one field from the page
|
54
|
+
|
55
|
+
|
56
|
+
end
|
57
|
+
def get_events(text_in)
|
58
|
+
@music_text = text_in
|
59
|
+
#while there are still events left do this
|
60
|
+
event_array = []
|
61
|
+
partition_array = [[:title,'</a><br/>'],[:venue,'<br />'],[:date,'<br />'],[:details,'</li>']] #fields we want to get and what to use for partition
|
62
|
+
n=0
|
63
|
+
while @music_text.include? "http://www.thestranger.com/seattle/Event?event="
|
64
|
+
event_hash = {}
|
65
|
+
@music_text = @music_text.partition(/\bevent=\b\d*.*/)[2] #pull off the stuff at the front
|
66
|
+
partition_array.each do |id|
|
67
|
+
my_array = @music_text.partition(id[1])
|
68
|
+
event_hash[id[0]] = my_array[0].strip #the bit before the partition
|
69
|
+
@music_text = my_array[2] #the bit after the partition
|
70
|
+
end
|
71
|
+
event_hash[:details]= event_hash[:details].split().join(",")
|
72
|
+
event_array[n] = event_hash
|
73
|
+
n+=1
|
74
|
+
end
|
75
|
+
event_array
|
76
|
+
end
|
77
|
+
|
78
|
+
def count_words(s_s)
|
79
|
+
h = Hash.new(0)
|
80
|
+
s_s.scan("class=\"recommend_list\"") do |w|
|
81
|
+
h[w] += 1
|
82
|
+
end
|
83
|
+
h
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
class Tester
|
88
|
+
def testmethod
|
89
|
+
scrape = Scrape.new(:local,"x")
|
90
|
+
x = scrape.load_page
|
91
|
+
x = scrape.remove_excess(x)
|
92
|
+
scrape.get_event(x)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class Google
|
97
|
+
attr_writer :password
|
98
|
+
attr_writer :user
|
99
|
+
attr_writer :title
|
100
|
+
attr_writer :venue
|
101
|
+
attr_writer :date
|
102
|
+
attr_writer :details
|
103
|
+
require "rubygems"
|
104
|
+
require "gcal4ruby"
|
105
|
+
|
106
|
+
|
107
|
+
def convert(event_date)
|
108
|
+
#convert a stranger date to a google date
|
109
|
+
#find out what day of the week today is
|
110
|
+
#assume that Sun...Sat are the next one
|
111
|
+
#{0=>'Sun', 1=>'Mon', 2=>'Tues', 3=>'Wed', 4=>'Thurs', 5=> 'Fri', 6=>'Sat',
|
112
|
+
day_array=[0,1,2,3,4,5,6] #to make subtracted days of week wrap around
|
113
|
+
day_hash = {:Sun=>0,:Mon=>1,:Tue=>2, :Wed=>3,:Thurs=>4,:Fri=>5,:Sat=>6}
|
114
|
+
month_hash = {:Jan=>1, :Feb=>2, :Mar=>3, :Apr=>4, :May=>5, :Jun=>6, :Jul=>7, :Aug=>8, :Sep=>9,:Oct=>10, :Nov=>11, :Dec=>12 }
|
115
|
+
current_time = Time.now
|
116
|
+
current_day = current_time.wday
|
117
|
+
date_array = event_date.split(" ")
|
118
|
+
if date_array.size == 3
|
119
|
+
#if it's 3 words assume the format is Day Month Date
|
120
|
+
event_day = day_hash[date_array[0].capitalize.to_sym]
|
121
|
+
event_month = month_hash[date_array[1].capitalize.to_sym]
|
122
|
+
event_date = date_array[2].to_i
|
123
|
+
event_time = Time.local(current_time.year,event_month,event_date)
|
124
|
+
else
|
125
|
+
#it's going to be in the form 'Every Mon' but we will only put it on for this week
|
126
|
+
event_day = day_hash[date_array[1].capitalize.to_sym] #find the number 0 to 6 for event day
|
127
|
+
event_time = current_time + (60*60*24*day_array[event_day - current_day])#get the differential from todays day
|
128
|
+
end
|
129
|
+
event_time
|
130
|
+
end
|
131
|
+
|
132
|
+
def create_event(title, venue, date, details)
|
133
|
+
@date = convert(date)
|
134
|
+
serv = GCal4Ruby::Service.new
|
135
|
+
serv.authenticate "fakeemail@gmail.com", "fakepassword"
|
136
|
+
calendar = GCal4Ruby::Calendar.new(serv)
|
137
|
+
event = GCal4Ruby::Event.new(calendar)
|
138
|
+
event.title = @title
|
139
|
+
event.start_time = @date
|
140
|
+
event.end_time = @date + 60*60 #add an hour for the end time
|
141
|
+
event.where = @venue
|
142
|
+
event.send
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
|
148
|
+
class GetUserInfo
|
149
|
+
end
|
metadata
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: musicscrape
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- tallbryan
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-12-11 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Goes to The Stranger's music page and parses the recommended music events
|
15
|
+
for the week
|
16
|
+
email: fakeemail@fakedomain.com
|
17
|
+
executables:
|
18
|
+
- musicscrape
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- lib/musicscrape.rb
|
23
|
+
- bin/musicscrape
|
24
|
+
homepage: http://rubygems.org/gems/musicscrape
|
25
|
+
licenses: []
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ! '>='
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ! '>='
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
requirements: []
|
43
|
+
rubyforge_project:
|
44
|
+
rubygems_version: 1.8.24
|
45
|
+
signing_key:
|
46
|
+
specification_version: 3
|
47
|
+
summary: Pull recommended music events off Seattle's The Stranger website
|
48
|
+
test_files: []
|