musicscrape 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/musicscrape +4 -0
- data/lib/musicscrape.rb +149 -0
- metadata +48 -0
data/bin/musicscrape
ADDED
data/lib/musicscrape.rb
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rest-client'
|
3
|
+
#This gem goes to www.thestranger.com/seattle/Music and scrapes data for all the recommended concerts.
|
4
|
+
#The intent is to have it put them on your google calendar but it hasn't gotten that far yet.
|
5
|
+
|
6
|
+
|
7
|
+
class Scrape
|
8
|
+
#@url='http://www.thestranger.com/seattle/Music' #The url for the web page we want
|
9
|
+
def initialize(source_type,url)
|
10
|
+
@source_type = source_type
|
11
|
+
@url = url
|
12
|
+
end
|
13
|
+
|
14
|
+
def load_page
|
15
|
+
if @source_type == :web then
|
16
|
+
(RestClient.get(@url))
|
17
|
+
else
|
18
|
+
open("#{File.dirname(__FILE__)}/stranger.txt", &:read)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def remove_excess(text_in)
|
23
|
+
#removes everthing except the recommended events section
|
24
|
+
#text_in = text_in.partition("class=\"recommend_list\"").drop(1).join
|
25
|
+
text_in = text_in.partition("<h2 class=\"sitesection\">Recommended Music Events</h2>").drop(1).join
|
26
|
+
text_in.partition("<li class=")[0..1].join.strip
|
27
|
+
end
|
28
|
+
|
29
|
+
#puts (RestClient.get(@path))
|
30
|
+
def open_local
|
31
|
+
open("#{File.dirname(__FILE__)}/stranger.txt", &:read)
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_event(page)
|
35
|
+
puts page
|
36
|
+
@music_text = page
|
37
|
+
@music_text = @music_text.partition(/\bevent=\b\d*.*/)[2]
|
38
|
+
|
39
|
+
my_array = @music_text.partition('</a><br/>')
|
40
|
+
#use the first item of the array minus leading and trailing white space as event title
|
41
|
+
event_hash = {}
|
42
|
+
event_hash[:title] = my_array[0]
|
43
|
+
@music_text = my_array[2].strip
|
44
|
+
|
45
|
+
#now partition it at <br /> and the first element is venue
|
46
|
+
my_array = @music_text.partition('<br />')
|
47
|
+
event_hash[:venue] = my_array[0]
|
48
|
+
@music_text = my_array[2].strip
|
49
|
+
|
50
|
+
event_hash
|
51
|
+
end
|
52
|
+
def get_field(id)
|
53
|
+
#gets one field from the page
|
54
|
+
|
55
|
+
|
56
|
+
end
|
57
|
+
def get_events(text_in)
|
58
|
+
@music_text = text_in
|
59
|
+
#while there are still events left do this
|
60
|
+
event_array = []
|
61
|
+
partition_array = [[:title,'</a><br/>'],[:venue,'<br />'],[:date,'<br />'],[:details,'</li>']] #fields we want to get and what to use for partition
|
62
|
+
n=0
|
63
|
+
while @music_text.include? "http://www.thestranger.com/seattle/Event?event="
|
64
|
+
event_hash = {}
|
65
|
+
@music_text = @music_text.partition(/\bevent=\b\d*.*/)[2] #pull off the stuff at the front
|
66
|
+
partition_array.each do |id|
|
67
|
+
my_array = @music_text.partition(id[1])
|
68
|
+
event_hash[id[0]] = my_array[0].strip #the bit before the partition
|
69
|
+
@music_text = my_array[2] #the bit after the partition
|
70
|
+
end
|
71
|
+
event_hash[:details]= event_hash[:details].split().join(",")
|
72
|
+
event_array[n] = event_hash
|
73
|
+
n+=1
|
74
|
+
end
|
75
|
+
event_array
|
76
|
+
end
|
77
|
+
|
78
|
+
def count_words(s_s)
|
79
|
+
h = Hash.new(0)
|
80
|
+
s_s.scan("class=\"recommend_list\"") do |w|
|
81
|
+
h[w] += 1
|
82
|
+
end
|
83
|
+
h
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
class Tester
|
88
|
+
def testmethod
|
89
|
+
scrape = Scrape.new(:local,"x")
|
90
|
+
x = scrape.load_page
|
91
|
+
x = scrape.remove_excess(x)
|
92
|
+
scrape.get_event(x)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class Google
|
97
|
+
attr_writer :password
|
98
|
+
attr_writer :user
|
99
|
+
attr_writer :title
|
100
|
+
attr_writer :venue
|
101
|
+
attr_writer :date
|
102
|
+
attr_writer :details
|
103
|
+
require "rubygems"
|
104
|
+
require "gcal4ruby"
|
105
|
+
|
106
|
+
|
107
|
+
def convert(event_date)
|
108
|
+
#convert a stranger date to a google date
|
109
|
+
#find out what day of the week today is
|
110
|
+
#assume that Sun...Sat are the next one
|
111
|
+
#{0=>'Sun', 1=>'Mon', 2=>'Tues', 3=>'Wed', 4=>'Thurs', 5=> 'Fri', 6=>'Sat',
|
112
|
+
day_array=[0,1,2,3,4,5,6] #to make subtracted days of week wrap around
|
113
|
+
day_hash = {:Sun=>0,:Mon=>1,:Tue=>2, :Wed=>3,:Thurs=>4,:Fri=>5,:Sat=>6}
|
114
|
+
month_hash = {:Jan=>1, :Feb=>2, :Mar=>3, :Apr=>4, :May=>5, :Jun=>6, :Jul=>7, :Aug=>8, :Sep=>9,:Oct=>10, :Nov=>11, :Dec=>12 }
|
115
|
+
current_time = Time.now
|
116
|
+
current_day = current_time.wday
|
117
|
+
date_array = event_date.split(" ")
|
118
|
+
if date_array.size == 3
|
119
|
+
#if it's 3 words assume the format is Day Month Date
|
120
|
+
event_day = day_hash[date_array[0].capitalize.to_sym]
|
121
|
+
event_month = month_hash[date_array[1].capitalize.to_sym]
|
122
|
+
event_date = date_array[2].to_i
|
123
|
+
event_time = Time.local(current_time.year,event_month,event_date)
|
124
|
+
else
|
125
|
+
#it's going to be in the form 'Every Mon' but we will only put it on for this week
|
126
|
+
event_day = day_hash[date_array[1].capitalize.to_sym] #find the number 0 to 6 for event day
|
127
|
+
event_time = current_time + (60*60*24*day_array[event_day - current_day])#get the differential from todays day
|
128
|
+
end
|
129
|
+
event_time
|
130
|
+
end
|
131
|
+
|
132
|
+
def create_event(title, venue, date, details)
|
133
|
+
@date = convert(date)
|
134
|
+
serv = GCal4Ruby::Service.new
|
135
|
+
serv.authenticate "fakeemail@gmail.com", "fakepassword"
|
136
|
+
calendar = GCal4Ruby::Calendar.new(serv)
|
137
|
+
event = GCal4Ruby::Event.new(calendar)
|
138
|
+
event.title = @title
|
139
|
+
event.start_time = @date
|
140
|
+
event.end_time = @date + 60*60 #add an hour for the end time
|
141
|
+
event.where = @venue
|
142
|
+
event.send
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
|
148
|
+
class GetUserInfo
|
149
|
+
end
|
metadata
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: musicscrape
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- tallbryan
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-12-11 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Goes to The Stranger's music page and parses the recommended music events
|
15
|
+
for the week
|
16
|
+
email: fakeemail@fakedomain.com
|
17
|
+
executables:
|
18
|
+
- musicscrape
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- lib/musicscrape.rb
|
23
|
+
- bin/musicscrape
|
24
|
+
homepage: http://rubygems.org/gems/musicscrape
|
25
|
+
licenses: []
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ! '>='
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ! '>='
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
requirements: []
|
43
|
+
rubyforge_project:
|
44
|
+
rubygems_version: 1.8.24
|
45
|
+
signing_key:
|
46
|
+
specification_version: 3
|
47
|
+
summary: Pull recommended music events off Seattle's The Stranger website
|
48
|
+
test_files: []
|