metade-link_toad 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,48 @@
1
+ = link_toad
2
+
3
+ * http://github.com/metade/link_toad
4
+
5
+ == DESCRIPTION:
6
+
7
+ LinkToad is a general purpose equivalency engine that uses hyperlinks.
8
+
9
+ == USAGE:
10
+
11
+ require 'rubygems'
12
+ require 'link_toad'
13
+
14
+ # mapping of artist-related URLs to MusicBrainz artist identifier
15
+ mapping = {
16
+ 'http://www.arcticmonkeys.com/' => 'ada7a83c-e3e1-40f1-93f9-3e73dbc9298a',
17
+ }
18
+ toad = LinkToad.new(mapping)
19
+
20
+ # match a news story linking to the Arctic Monkeys homepage
21
+ toad.match('http://news.bbc.co.uk/1/hi/entertainment/7664082.stm')
22
+ => ["ada7a83c-e3e1-40f1-93f9-3e73dbc9298a"]
23
+
24
+ See +examples/+ for more details.
25
+
26
+ == LICENSE:
27
+
28
+ The MIT License
29
+
30
+ Copyright (c) 2008 Patrick Sinclair
31
+
32
+ Permission is hereby granted, free of charge, to any person obtaining a copy
33
+ of this software and associated documentation files (the "Software"), to deal
34
+ in the Software without restriction, including without limitation the rights
35
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
36
+ copies of the Software, and to permit persons to whom the Software is
37
+ furnished to do so, subject to the following conditions:
38
+
39
+ The above copyright notice and this permission notice shall be included in
40
+ all copies or substantial portions of the Software.
41
+
42
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
43
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
44
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
45
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
46
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
47
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
48
+ THE SOFTWARE.
@@ -0,0 +1,13 @@
1
+ These scripts enable news items on the Guardian RSS feed to be associated to a particular MusicBrainz artist.
2
+
3
+ guardian-musicbrainz.rb
4
+ -----------------------
5
+
6
+ * pulls in the full list of music related tag from the guardian web site
7
+ * for each sensible-looking tag it searches MusicBrainz for a matching artist
8
+ * it outputs a YAML file mapping Guardian tag URLs to artist GUIDs
9
+
10
+ guardian-music-news.rb
11
+ ----------------------
12
+
13
+ Uses the mapping produced by +guardian-musicbrainz.rb+ to associate stories from the Guardian Music RSS feed to MusicBrainz artist.
@@ -0,0 +1,15 @@
1
+ require 'rubygems'
2
+ require 'rss/2.0'
3
+ require 'yaml'
4
+ require '../../../lib/link_toad.rb'
5
+
6
+ mapping = YAML.load_file('guardian-musicbrainz.yml')
7
+ toad = LinkToad.new(mapping)
8
+
9
+ feed = RSS::Parser.parse(open('http://www.guardian.co.uk/music/rss'))
10
+ feed.items.each do |item|
11
+ url = item.guid.content
12
+ gids = toad.match(url)
13
+ puts "#{url}: #{gids.inspect}"
14
+ end
15
+
@@ -0,0 +1,40 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+ require 'rbrainz'
4
+ require 'open-uri'
5
+ require 'yaml'
6
+
7
+ file = File.open('guardian-musicbrainz.yml', 'w')
8
+ file.puts('---')
9
+
10
+ ignore_list = [
11
+ # guardian specific
12
+ /alternativetop100albums/, /bobmarley60thanniversary/,
13
+ # events
14
+ /britawards/, /eurovision/, /glastonbury/, /live8/, /meltdownfestival/,
15
+ /mercuryprize/, /o2wirelessweekenders/, /proms/, /readingandleedsfestival/,
16
+ # genres
17
+ /classicalmusicandopera/, /downloads/, /electronicmusic/, /folk/,
18
+ /jazz/, /popandrock/, /worldmusic/
19
+ ]
20
+
21
+ urls = []
22
+ q = MusicBrainz::Webservice::Query.new
23
+ doc = Hpricot(open('http://www.guardian.co.uk/music/list/allmusickeywords'))
24
+ doc.search('//a').each do |link|
25
+ url = link.attributes['href']
26
+ next if url.nil?
27
+ next if urls.include? url
28
+ next unless url =~ %r[http://www.guardian.co.uk/music/(\w+)$]
29
+ next if ignore_list.detect { |re| url =~ re }
30
+ urls << url
31
+
32
+ name = link.inner_html.strip
33
+ results = q.get_artists(MusicBrainz::Webservice::ArtistFilter.new(:query => name))
34
+ sleep 1 # don't hit MusicBrainz too hard
35
+
36
+ if results.size>0
37
+ m_name, gid = results[0].entity.name, results[0].entity.id.uuid
38
+ file.puts("#{url}: #{gid} ##{m_name}")
39
+ end
40
+ end
@@ -0,0 +1,245 @@
1
+ ---
2
+ http://www.guardian.co.uk/music/2pac: 382f1005-e9ab-4684-afd4-0bdae4ee37f2 #2Pac
3
+ http://www.guardian.co.uk/music/50cent: 6e3db4ff-39cc-4675-8310-5ace0cd63bfa #50 Cent & Eminem
4
+ http://www.guardian.co.uk/music/aaliyah: bc85da58-52d9-457d-ae8d-5d8d4ec870a9 #Aaliyah
5
+ http://www.guardian.co.uk/music/abba: d87e52c5-bb8d-4da8-b941-9f4928627dc8 #ABBA
6
+ http://www.guardian.co.uk/music/ryanadams: c80f38a6-9980-485d-997c-5c1a9cbd0d64 #Ryan Adams
7
+ http://www.guardian.co.uk/music/christinaaguilera: b202beb7-99bd-47e7-8b72-195c8d72ebdd #Christina Aguilera
8
+ http://www.guardian.co.uk/music/akon: 1138a764-2212-4d0a-b02d-0dc14df91e08 #Akon
9
+ http://www.guardian.co.uk/music/alltomorrowsparties: 1771f952-52c9-4de4-8c65-8ca95f932652 #All Tomorrow's Party
10
+ http://www.guardian.co.uk/music/lilyallen: 6e0c7c0e-cba5-4c2c-a652-38f71ef5785d #Lily Allen
11
+ http://www.guardian.co.uk/music/toriamos: c0b2500e-0cef-4130-869d-732b23ed9df5 #Tori Amos
12
+ http://www.guardian.co.uk/music/animalcollective: 0c751690-c784-4a4f-b1e4-c1de27d47581 #Animal Collective
13
+ http://www.guardian.co.uk/music/antonyandthejohnsons: 90cc2464-234e-4da0-b39b-576f36e633bc #Antony and the Johnsons
14
+ http://www.guardian.co.uk/music/fionaapple: a9ee533f-8871-4f62-a6bb-91eb264abc90 #Fiona Apple
15
+ http://www.guardian.co.uk/music/arcadefire: 52074ba6-e495-4ef3-9bb4-0703888a9f68 #Arcade Fire
16
+ http://www.guardian.co.uk/music/arcticmonkeys: ada7a83c-e3e1-40f1-93f9-3e73dbc9298a #Arctic Monkeys
17
+ http://www.guardian.co.uk/music/richardashcroft: 2d1d8985-47bc-4244-8cc3-577584e411f6 #Richard Ashcroft
18
+ http://www.guardian.co.uk/music/babyshambles: 8e1e03fe-ebbc-467a-b541-857144db10fb #Babyshambles
19
+ http://www.guardian.co.uk/music/jsbach: e2f2a608-9a8b-47b7-81d1-04af13632336 #JS
20
+ http://www.guardian.co.uk/music/badlydrawnboy: 0881daf1-20df-4a3e-a84f-6476a84bb172 #Badly Drawn Boy
21
+ http://www.guardian.co.uk/music/erykahbadu: 321531fc-db73-4ffa-a959-61a61a2908c1 #Erykah Badu
22
+ http://www.guardian.co.uk/music/sydbarrett: 12327d75-47d5-45d9-84c2-3760b9210c17 #Syd Barrett
23
+ http://www.guardian.co.uk/music/basementjaxx: 28cbf94d-0700-4095-a188-37e373b069a7 #Basement Jaxx
24
+ http://www.guardian.co.uk/music/batforlashes: 10000730-525f-4ed5-aaa8-92888f060f5f #Bat for Lashes
25
+ http://www.guardian.co.uk/music/battles: 8522b9b6-b295-48d7-9a10-8618fb80beb8 #Battles
26
+ http://www.guardian.co.uk/music/thebeatles: b10bbbfc-cf9e-42e0-be17-e2c3e1d2600d #The Beatles
27
+ http://www.guardian.co.uk/music/beck: 0df890e1-f4f2-4b21-a413-cd8af1af32d8 #Christophe Beck
28
+ http://www.guardian.co.uk/music/belleandsebastian: e5c7b94f-e264-473c-bb0f-37c85d4d5c70 #Belle and Sebastian
29
+ http://www.guardian.co.uk/music/bjork: 87c5dedd-371d-4a53-9f7f-80522fb7f3cb #Björk
30
+ http://www.guardian.co.uk/music/blackflag: 9941a936-196a-4a62-ae53-a69cbc33f20e #Black Flag
31
+ http://www.guardian.co.uk/music/frankblack: 98a35a4d-a845-4282-bc7c-5679f2bb9bd2 #Frank Black
32
+ http://www.guardian.co.uk/music/blacksabbath: 5182c1d9-c7d2-4dad-afa0-ccfeada921a8 #Black Sabbath
33
+ http://www.guardian.co.uk/music/blocparty: 8c538f11-c141-4588-8ecb-931083524186 #Bloc Party
34
+ http://www.guardian.co.uk/music/blondie: 4d2956d1-a3f7-44bb-9a41-67563e1a0c94 #Blondie
35
+ http://www.guardian.co.uk/music/blur: ba853904-ae25-4ebb-89d6-c44cfbd71bd2 #Blur
36
+ http://www.guardian.co.uk/music/bonnieprincebilly: 7b0a63b2-b8e8-490e-b724-1d30cb5edfe3 #Bonnie 'Prince' Billy
37
+ http://www.guardian.co.uk/music/davidbowie: 5441c29d-3602-4898-b1a1-b77fa23b8e50 #David Bowie
38
+ http://www.guardian.co.uk/music/breeders: 667e856e-f3a7-42e9-8244-95422ef31321 #The Breeders
39
+ http://www.guardian.co.uk/music/foxybrown: c0235fec-bb90-4976-8da1-5d1b37e158ec #Foxy Brown & Jay-Z
40
+ http://www.guardian.co.uk/music/ianbrown: 182d5d8a-94c7-4ef4-8222-a1838353a37b #Ian Brown
41
+ http://www.guardian.co.uk/music/buzzcocks: 31e9c35b-2675-4632-8596-f9bd9286f6c8 #Buzzcocks
42
+ http://www.guardian.co.uk/music/camdencrawl: b1dffc4b-0b99-4037-a726-0a111e0ca812 #Camden
43
+ http://www.guardian.co.uk/music/mariahcarey: 494e8d09-f85b-4543-892f-a5096aed1cd4 #Mariah Carey
44
+ http://www.guardian.co.uk/music/johnnycash: d43d12a1-2dc9-4257-a2fd-0a3bb1081b86 #Johnny Cash
45
+ http://www.guardian.co.uk/music/charlatans: 8434409e-baa9-4e12-b4aa-566a91c7d7cf #The Charlatans
46
+ http://www.guardian.co.uk/music/raycharles: 2ce02909-598b-44ef-a456-151ba0a3bd70 #Ray Charles
47
+ http://www.guardian.co.uk/music/clapyourhandssayyeah: 4b2d6a23-034d-4a29-9bb9-d2462796da4e #Clap Your Hands Say Yeah
48
+ http://www.guardian.co.uk/music/ericclapton: 618b6900-0618-4f1e-b835-bccb17f84294 #Eric Clapton
49
+ http://www.guardian.co.uk/music/clash: 8f92558c-2baa-4758-8c38-615519e9deda #The Clash
50
+ http://www.guardian.co.uk/music/jimmycliff: 2caa54a7-b08c-41da-b892-3a41abe778be #Jimmy Cliff
51
+ http://www.guardian.co.uk/music/clinic: 1ea2c08f-323c-4207-9af1-e2fb8588f08e #Clinic
52
+ http://www.guardian.co.uk/music/georgeclinton: 7e22c4e9-1355-48d3-af58-03347e95b5f3 #George S. Clinton
53
+ http://www.guardian.co.uk/music/clubs: a3751fb8-b8cc-4b43-a1cb-3cb39a074402 #Clubbing
54
+ http://www.guardian.co.uk/music/coldplay: cc197bad-dc9c-440d-a5b5-d52ba2e14234 #Coldplay
55
+ http://www.guardian.co.uk/music/alicecooper: 4d7928cd-7ed2-4282-8c29-c0c9f966f1bd #Alice Cooper
56
+ http://www.guardian.co.uk/music/elviscostello: 8a338e06-d182-46f2-bd16-30a09bc840ba #Elvis Costello
57
+ http://www.guardian.co.uk/music/cribs: a3a92047-be1c-4f3e-8960-c4f8570984df #The Cribs
58
+ http://www.guardian.co.uk/music/crosbystillsnashandyoung: 46a782ea-4308-476b-abd1-a91b197f3037 #Crosby, Stills, Nash & Young
59
+ http://www.guardian.co.uk/music/cypresshill: 51508c1f-8d07-4a00-9cf1-26c570fe7b78 #Cypress Hill
60
+ http://www.guardian.co.uk/music/dangermouse: 4b356f05-bcc2-4544-925b-fd9a1bf708be #Danger Mouse
61
+ http://www.guardian.co.uk/music/raydavies: a0ecce6d-7dfd-4e3c-9d1e-26465244450d #Ray Davies & His Funky Trumpet
62
+ http://www.guardian.co.uk/music/deathcabforcutie: 0039c7ae-e1a7-4a7d-9b49-0cbc716821a6 #Death Cab for Cutie
63
+ http://www.guardian.co.uk/music/defleppard: 7249b899-8db8-43e7-9e6e-22f1e736024e #Def Leppard
64
+ http://www.guardian.co.uk/music/devo: 4d8a5bce-7f33-4fd2-bf8f-e8f6cf467373 #Devo Springsteen
65
+ http://www.guardian.co.uk/music/dinosaurjr: 77c167d2-4965-4421-830a-9815e4956475 #Dinosaur Jr.
66
+ http://www.guardian.co.uk/music/direstraits: 614e3804-7d34-41ba-857f-811bad7c2b7a #Dire Straits
67
+ http://www.guardian.co.uk/music/dirtyprettythings: 648615ca-ca74-460d-928a-2bae67ae6d14 #Dirty Pretty Things
68
+ http://www.guardian.co.uk/music/dizzeerascal: 1a99cc88-aea3-4fe3-96b9-20791667f65f #Dizzee Rascal
69
+ http://www.guardian.co.uk/music/petedoherty: ff041743-15eb-49db-933f-8bc66a4a3235 #Pete Doherty
70
+ http://www.guardian.co.uk/music/doors: 9efff43b-3b29-4082-824e-bc82f646f93d #The Doors
71
+ http://www.guardian.co.uk/music/drdre: 5f6ab597-f57a-40da-be9e-adad48708203 #Dr. Dre
72
+ http://www.guardian.co.uk/music/duranduran: 2f1baa8d-aad7-4bf5-b5f2-ec857e20129a #Andy Durán
73
+ http://www.guardian.co.uk/music/bobdylan: 72c536dc-7137-4477-a521-567eeb840fa8 #Bob Dylan
74
+ http://www.guardian.co.uk/music/eagles: f46bd570-5768-462e-b84c-c7c993bbf47e #Eagles
75
+ http://www.guardian.co.uk/music/echoandthebunnymen: ccd4879c-5e88-4385-b131-bf65296bf245 #Echo & The Bunnymen
76
+ http://www.guardian.co.uk/music/eels: 14387b0f-765c-4852-852f-135335790466 #Eels
77
+ http://www.guardian.co.uk/music/elbow: 3cb3928a-526c-4a3d-93c5-53315fa9bde0 #Elbow
78
+ http://www.guardian.co.uk/music/missyelliott: a0b8cb9e-7532-45fe-a74c-30e7c4009a39 #Missy Elliott
79
+ http://www.guardian.co.uk/music/elvis25yearson: 6bd50a1f-c6b6-49ab-97b5-87cc5430f16c #17 Years
80
+ http://www.guardian.co.uk/music/eminem: b95ce3ff-3d05-4e87-9e01-c97b66af13d4 #Eminem
81
+ http://www.guardian.co.uk/music/brianeno: ff95eb47-41c4-4f7f-a104-cdc30f02e872 #Brian Eno
82
+ http://www.guardian.co.uk/music/falloutboy: 516cef4d-0718-4007-9939-f9b38af3f784 #Fall Out Boy
83
+ http://www.guardian.co.uk/music/fall: d5da1841-9bc8-4813-9f89-11098090148e #The Fall
84
+ http://www.guardian.co.uk/music/bryanferry: 4ef7a9e2-2cf5-483a-8616-ef7791a98026 #Bryan Ferry
85
+ http://www.guardian.co.uk/music/foals: 6a65d878-fcd0-42cf-aff9-ca1d636a8bcc #Foals
86
+ http://www.guardian.co.uk/music/foofighters: 67f66c07-6e61-4026-ade5-7e782fad3a5d #Foo Fighters
87
+ http://www.guardian.co.uk/music/franzferdinand: aa7a2827-f74b-473c-bd79-03d065835cf7 #Franz Ferdinand
88
+ http://www.guardian.co.uk/music/futureheads: 0cf9d983-aecf-4d57-9e94-fde2beac014f #The Futureheads
89
+ http://www.guardian.co.uk/music/gallows: 4cb159df-bd09-4f42-bfec-09b6a248a52f #Gallows
90
+ http://www.guardian.co.uk/music/gangoffour: d8661c02-f423-4d72-8044-40ff05daf7a1 #Gang of Four
91
+ http://www.guardian.co.uk/music/georgeharrison: 42a8f507-8412-4611-854f-926571049fa0 #George Harrison
92
+ http://www.guardian.co.uk/music/georgemichael: ccb8f30e-4d71-40c4-8b1d-846dafe73e2c #George Michael
93
+ http://www.guardian.co.uk/music/girlsaloud: a0b2f210-cd3a-453d-937d-e4f2658d17c7 #Girls Aloud
94
+ http://www.guardian.co.uk/music/gnarlsbarkley: a47c3aa2-7d87-475c-a2c7-1e2047dafb09 #Gnarls Barkley
95
+ http://www.guardian.co.uk/music/gorillaz: e21857d5-3256-4547-afb3-4b6ded592596 #Gorillaz
96
+ http://www.guardian.co.uk/music/greenday: 084308bd-1654-436f-ba03-df6697104e19 #Green Day
97
+ http://www.guardian.co.uk/music/grinderman: 611f1230-7c2b-4610-b9e6-5a4fcb6e3cc7 #Grinderman
98
+ http://www.guardian.co.uk/music/groovearmada: 35723b60-732e-4bd8-957f-320b416e7b7f #Groove Armada
99
+ http://www.guardian.co.uk/music/gunsnroses: eeb1195b-f213-4ce1-b28c-8565211f8e43 #Guns N' Roses
100
+ http://www.guardian.co.uk/music/pjharvey: e795e03d-b5d5-4a5f-834d-162cfb308a2c #PJ Harvey
101
+ http://www.guardian.co.uk/music/isaachayes: a2361c7d-ddb6-41c1-a9a0-be09fbbb8d21 #Isaac Hayes
102
+ http://www.guardian.co.uk/music/icecube: b1274489-1832-4fd9-a7b6-9414d0b05f62 #Dr. Dre & Ice Cube
103
+ http://www.guardian.co.uk/music/indigogirls: 00c49f40-d715-4b79-b223-432048602cce #Indigo Girls
104
+ http://www.guardian.co.uk/music/inxs: 481bf5f9-2e7c-4c44-b08a-05b32bc7c00d #INXS
105
+ http://www.guardian.co.uk/music/boniver: 437a0e49-c6ae-42f6-a6c1-84f25ed366bc #Bon Iver
106
+ http://www.guardian.co.uk/music/michaeljackson: f27ec8db-af05-4f36-916e-3d57f91ecf5e #Michael Jackson
107
+ http://www.guardian.co.uk/music/jam: 23228f18-01d5-493e-94ce-cfcde82a8db2 #The Jam
108
+ http://www.guardian.co.uk/music/jayz: f82bcf78-5b69-4622-a5ef-73800768d9ac #Jay-Z
109
+ http://www.guardian.co.uk/music/jesusandmarychain: e938a15c-b17e-4e7a-9f68-ff0d536cab44 #The Jesus and Mary Chain
110
+ http://www.guardian.co.uk/music/johnfordham: fc3d0f8f-236e-4012-afc3-3002c4d2530c #Debra Fordham
111
+ http://www.guardian.co.uk/music/johnlennon: 4d5447d7-c61c-4120-ba1b-d7f471d385b9 #John Lennon
112
+ http://www.guardian.co.uk/music/joydivision: 9a58fda3-f4ed-4080-a3a5-f457aac9fcdd #Joy Division
113
+ http://www.guardian.co.uk/music/kaiserchiefs: 90218af4-4d58-4821-8d41-2ee295ebbe21 #Kaiser Chiefs
114
+ http://www.guardian.co.uk/music/kasabian: 69b39eab-6577-46a4-a9f5-817839092033 #Kasabian
115
+ http://www.guardian.co.uk/music/kelis: 1239d1bc-cc09-43e0-bcd0-374f60346138 #Kelis
116
+ http://www.guardian.co.uk/music/aliciakeys: e92aa52d-bb07-4ed7-bcca-2c3f7e93d863 #Usher & Alicia Keys
117
+ http://www.guardian.co.uk/music/rilokiley: eaf6a7ca-105d-4a94-ba02-8c3e4040319a #Rilo Kiley
118
+ http://www.guardian.co.uk/music/killers: 95e1ead9-4d31-4808-a7ac-32c3614c116b #The Killers
119
+ http://www.guardian.co.uk/music/kingsofconvenience: cf0f4547-ffbd-4011-98ad-0bec9ba022db #Kings of Convenience
120
+ http://www.guardian.co.uk/music/kingsofleon: 6ffb8ea9-2370-44d8-b678-e9237bbd347b #Kings of Leon
121
+ http://www.guardian.co.uk/music/kinks: 17b53d9f-5c63-4a09-a593-dde4608e0db9 #The Kinks
122
+ http://www.guardian.co.uk/music/kiss: 98b67ebc-5606-4cdb-9787-47b12cceb101 #Kiss
123
+ http://www.guardian.co.uk/music/alisonkrauss: 6b064ead-91a4-4ac8-8076-b1febe4f4aac #Alison Krauss
124
+ http://www.guardian.co.uk/music/ledzeppelin: 678d88b2-87b0-403b-b63d-5da7465aecc3 #Led Zeppelin
125
+ http://www.guardian.co.uk/music/johnlegend: 75a72702-a5ef-4513-bca5-c5b944903546 #John Legend
126
+ http://www.guardian.co.uk/music/leonalewis: 8d552dfc-648f-401f-90de-e925013ca537 #Leona Lewis
127
+ http://www.guardian.co.uk/music/libertines: 82b304c0-7da4-45d3-896a-0767c7ae1141 #The Libertines
128
+ http://www.guardian.co.uk/music/llcoolj: a4dd0e77-83b8-4e92-89b7-effb0e47fd8c #LL Cool J
129
+ http://www.guardian.co.uk/music/love: 34ec9a8d-c65b-48fd-bcdd-aad2f72fdb47 #Love
130
+ http://www.guardian.co.uk/music/courtney: 31d2041c-985d-48f7-b6e2-2a70cdf14853 #Courtney Love
131
+ http://www.guardian.co.uk/music/nicklowe: a3d5a6bf-c86d-44d3-969b-f345620009c6 #Nick Lowe
132
+ http://www.guardian.co.uk/music/luna: 107ed89a-88ff-4a90-8b75-2619dc7ba950 #Luna-C
133
+ http://www.guardian.co.uk/music/madonna: 79239441-bfd5-4981-a70c-55c3f15c1287 #Madonna
134
+ http://www.guardian.co.uk/music/bobmarley: c33627c6-ef0d-49de-9ef0-c4804190040f #Bob Marley
135
+ http://www.guardian.co.uk/music/johnnymarr: d6ee4ecc-ed0a-42f3-a3c8-d943bd63744f #Johnny Marr + The Healers
136
+ http://www.guardian.co.uk/music/maximopark: 92e634a7-6023-4be8-be15-ebba822f5b34 #Maxïmo Park
137
+ http://www.guardian.co.uk/music/paulmccartney: cd7a47b2-bdcb-413f-a939-7d8d55bd6fc3 #Paul McCartney & Linda McCartney
138
+ http://www.guardian.co.uk/music/metallica: 65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab #Metallica
139
+ http://www.guardian.co.uk/music/mia: 5f6be871-eb98-42f1-bce4-5a3d8212c281 #M.I.A.
140
+ http://www.guardian.co.uk/music/michaeljacksontrial: f27ec8db-af05-4f36-916e-3d57f91ecf5e #Michael Jackson
141
+ http://www.guardian.co.uk/music/kylieminogue: 2fddb92d-24b2-46a5-bf28-3aed46f4684c #Kylie Minogue
142
+ http://www.guardian.co.uk/music/jonimitchell: a6de8ef9-b1a1-4756-97aa-481bbb8a4069 #Joni Mitchell
143
+ http://www.guardian.co.uk/music/modestmouse: a96ac800-bfcb-412a-8a63-0a98df600700 #Modest Mouse
144
+ http://www.guardian.co.uk/music/vanmorrison: a41ac10f-0a56-4672-9161-b83f9b223559 #Van Morrison
145
+ http://www.guardian.co.uk/music/morrissey: 013fa897-86db-41d3-8e9f-386c8a34f4e6 #Morrissey
146
+ http://www.guardian.co.uk/music/mozart: b972f589-fb0e-474e-b64a-803b0364fa75 #Wolfgang Amadeus Mozart
147
+ http://www.guardian.co.uk/music/mudhoney: e675295a-1efe-4247-aa3b-53b78d0cdffc #Mudhoney
148
+ http://www.guardian.co.uk/music/muse: 9c9f1380-2516-4fc9-a3e6-f9f61941d090 #Muse
149
+ http://www.guardian.co.uk/music/netmusic: a10ffaf2-0d3f-4d89-bc42-914c36e86f02 #Internet
150
+ http://www.guardian.co.uk/music/roxymusic: 331ce348-1b08-40b9-8ed7-0763b92bd003 #Roxy Music
151
+ http://www.guardian.co.uk/music/mychemicalromance: c07f0676-9143-4217-8a9f-4c26bd636f13 #My Chemical Romance
152
+ http://www.guardian.co.uk/music/neptunes: cc85e0b6-e953-4602-be9c-8c4218e417de #Neptunes
153
+ http://www.guardian.co.uk/music/nerd: 3fb49f5a-fdc0-4789-9c84-22b38b3f3cb5 #N.E.R.D.
154
+ http://www.guardian.co.uk/music/neworder: af1995e4-16a1-4c05-9ac9-082edeb3a099 #Kylie Minogue vs. New Order
155
+ http://www.guardian.co.uk/music/stevienicks: 4b1bf05d-0e3c-44a3-9fc8-ad088fc25797 #Buckingham Nicks
156
+ http://www.guardian.co.uk/music/nineinchnails: b7ffd2af-418f-4be2-bdd1-22f8b48613da #Nine Inch Nails
157
+ http://www.guardian.co.uk/music/nirvana: 5b11f4ce-a62d-471e-81fc-a69a8278c7da #Nirvana
158
+ http://www.guardian.co.uk/music/nodoubt: fbd2a255-1d57-4d31-ac11-65b671c19958 #No Doubt
159
+ http://www.guardian.co.uk/music/garynuman: 6cb79cb2-9087-44d4-828b-5c6fdff2c957 #Gary Numan
160
+ http://www.guardian.co.uk/music/nwa: 3a54bffa-2314-44a2-927b-60144119c780 #N.W.A
161
+ http://www.guardian.co.uk/music/oasis: 39ab1aed-75e0-4140-bd47-540276886b60 #Oasis
162
+ http://www.guardian.co.uk/music/oldirtybastard: d50548a0-3cfd-4d7a-964b-0aef6545d819 #Ol' Dirty Bastard
163
+ http://www.guardian.co.uk/music/orbital: f3e2a7d9-c6bb-4848-95e5-04c0a1e2f511 #Orbital
164
+ http://www.guardian.co.uk/music/ozzyosbourne: 8aa5b65a-5b3c-4029-92bf-47a544356934 #Ozzy Osbourne
165
+ http://www.guardian.co.uk/music/outkast: 73fdb566-a9b1-494c-9f32-51768ec9fd27 #OutKast
166
+ http://www.guardian.co.uk/music/panicatthedisco: b9472588-93f3-4922-a1a2-74082cdf9ce8 #Panic at the Disco
167
+ http://www.guardian.co.uk/music/gramparsons: cbc83fb1-2c23-4ad1-9187-594b30da3f21 #Gram Parsons
168
+ http://www.guardian.co.uk/music/peaches: 270acfee-1fbe-413e-a0a8-6a35a8b3b66c #Peaches
169
+ http://www.guardian.co.uk/music/pearljam: 83b9cbe7-9857-49e2-ab8e-b57b01038103 #Pearl Jam
170
+ http://www.guardian.co.uk/music/johnpeel: 65ddc5dd-6e1c-4f70-bee3-b67703bbf4c8 #John Peel
171
+ http://www.guardian.co.uk/music/petshopboys: be540c02-7898-4b79-9acc-c8122c7d9e83 #Pet Shop Boys
172
+ http://www.guardian.co.uk/music/tompetty: f93dbc64-6f08-4033-bcc7-8a0bb4689849 #Tom Petty and The Heartbreakers
173
+ http://www.guardian.co.uk/music/pinkfloyd: 83d91898-7763-47d7-b03b-b92132375c47 #Pink Floyd
174
+ http://www.guardian.co.uk/music/pixies: b6b2bb8d-54a9-491f-9607-7b546023b433 #Pixies
175
+ http://www.guardian.co.uk/music/iggypop: f37b3f31-b1f8-4b88-8cb5-b34f709b17d7 #Iggy Pop
176
+ http://www.guardian.co.uk/music/portishead: 8f6bd1e4-fbe1-4f50-aa9b-94c450ec0f11 #Portishead
177
+ http://www.guardian.co.uk/music/elvispresley: 01809552-4f87-45b0-afff-2c6f0730a3be #Elvis Presley
178
+ http://www.guardian.co.uk/music/primalscream: 55704c38-224f-4b75-b29f-d43653f8bc9a #Primal Scream
179
+ http://www.guardian.co.uk/music/prince: 070d193a-845c-479f-980e-bef15710653e #Prince
180
+ http://www.guardian.co.uk/music/prodigy: 4a4ee089-93b1-4470-af9a-6ff575d32704 #The Prodigy
181
+ http://www.guardian.co.uk/music/pulp: 76b2e842-5e85-4c97-ab62-d5bc315595b5 #Pulp
182
+ http://www.guardian.co.uk/music/queensofthestoneage: 7dc8f5bd-9d0b-4087-9f73-dc164950bbd8 #Queens of the Stone Age
183
+ http://www.guardian.co.uk/music/raconteurs: be407b02-f3e6-4ed5-9489-f8e5f0ab36dc #The Raconteurs
184
+ http://www.guardian.co.uk/music/radiohead: a74b1b7f-71a5-4011-9441-d0b5e4122711 #Radiohead
185
+ http://www.guardian.co.uk/music/ramones: d6ed7887-a401-47a8-893c-34b967444d26 #Ramones
186
+ http://www.guardian.co.uk/music/razorlight: f2cb0435-d643-4fab-9587-fdb0279330a7 #Razorlight
187
+ http://www.guardian.co.uk/music/redhotchilipeppers: 8bfac288-ccc5-448d-9573-c33ea2aa5c30 #Red Hot Chili Peppers
188
+ http://www.guardian.co.uk/music/keithrichards: f0ed72a3-ae8f-4cf7-b51d-2696a2330230 #Keith Richards
189
+ http://www.guardian.co.uk/music/santogold: d7311646-287b-4d3a-9a4f-7d46f93075e5 #Santogold
190
+ http://www.guardian.co.uk/music/sexpistols: e5db18cb-4b1f-496d-a308-548b611090d3 #Sex Pistols
191
+ http://www.guardian.co.uk/music/simonandgarfunkel: 5d02f264-e225-41ff-83f7-d9b1f0b1874a #Simon & Garfunkel
192
+ http://www.guardian.co.uk/music/siouxsieandthebanshees: 78ea5ea1-3c4d-4b7e-ac5d-68900319ebe2 #Siouxsie and The Banshees
193
+ http://www.guardian.co.uk/music/smashingpumpkins: ba0d6274-db14-4ef5-b28d-657ebde1a396 #The Smashing Pumpkins
194
+ http://www.guardian.co.uk/music/elliottsmith: 03ad1736-b7c9-412a-b442-82536d63a5c4 #Elliott Smith
195
+ http://www.guardian.co.uk/music/smiths: 40f5d9e4-2de7-4f2d-ad41-e31a9a9fea27 #The Smiths
196
+ http://www.guardian.co.uk/music/snoopdogg: f90e8b26-9e52-4669-a5c9-e28529c47894 #Snoop Dogg
197
+ http://www.guardian.co.uk/music/samsparro: cd71e6e9-42bb-4a1a-b5ce-17f41682b3e2 #Sam Sparro
198
+ http://www.guardian.co.uk/music/britneyspears: 45a663b5-b1cb-4a91-bff6-2bef7bbfdd76 #Britney Spears
199
+ http://www.guardian.co.uk/music/spicegirls: bf0caafc-2b20-4e07-ab85-87e14ff430ce #Spice Girls
200
+ http://www.guardian.co.uk/music/spiritualized: 65041e06-83d2-4987-ae52-c17a915fc82a #Spiritualized
201
+ http://www.guardian.co.uk/music/springsteen: 70248960-cb53-4ea4-943a-edb18f7d336f #Bruce Springsteen
202
+ http://www.guardian.co.uk/music/gwenstefani: 2e41ae9c-afd2-4f20-8f1e-17281ce9b472 #Gwen Stefani
203
+ http://www.guardian.co.uk/music/sufjanstevens: 01d3c51b-9b98-418a-8d8e-37f6fab59d8c #Sufjan Stevens
204
+ http://www.guardian.co.uk/music/davestewart: 4f8df6e2-33dc-4d05-86d9-2f9641c6f4d7 #Dave Stewart & Barbara Gaskin
205
+ http://www.guardian.co.uk/music/rodstewart: a35237a0-4f47-40a6-b6f3-1e786db23402 #Rod Stewart
206
+ http://www.guardian.co.uk/music/stoneroses: b5fa29f1-6c22-4321-a488-b5f363b06b06 #The Stone Roses
207
+ http://www.guardian.co.uk/music/strokes: 16aacd08-a0f3-46c1-b7ec-f1736f5de60d #The Diff'rent Strokes
208
+ http://www.guardian.co.uk/music/joestrummer: 39c1e474-647e-42ef-a157-fcfb30c2c2ff #Joe Strummer & The Mescaleros
209
+ http://www.guardian.co.uk/music/supergrass: 6386ddff-0d13-4685-9f0a-a82bf022fb1c #Supergrass
210
+ http://www.guardian.co.uk/music/hives: 487bfd74-71bf-46dd-b89c-80b7a0f06f2f #The Hives
211
+ http://www.guardian.co.uk/music/therollingstones: b071f9fa-14b0-4217-8e97-eb41da73f598 #The Rolling Stones
212
+ http://www.guardian.co.uk/music/thespecials: 07eb40a2-2914-439c-a01d-15a685b84ddf #The Specials
213
+ http://www.guardian.co.uk/music/thestreets: 0345b1d2-9017-4a97-848e-d5f7d2ea8de6 #One Way Streets
214
+ http://www.guardian.co.uk/music/who: 9fdaa16b-a6c4-4831-b87c-bc9ca8ce7eaa #The Who
215
+ http://www.guardian.co.uk/music/timbaland: daa09819-5da5-4c7a-8bef-eb372bb27ff1 #Timbaland
216
+ http://www.guardian.co.uk/music/justintimberlake: 596ffa74-3d08-44ef-b113-765d43d12738 #Justin Timberlake
217
+ http://www.guardian.co.uk/music/petetownshend: fb147b8f-0144-4418-acaa-90b2d9779840 #Pete Townshend
218
+ http://www.guardian.co.uk/music/kttunstall: 951d2103-9c7d-4849-ae60-88bf6aa4790b #KT Tunstall
219
+ http://www.guardian.co.uk/music/u2: a3cb23fc-acd3-4ce0-8f36-1e5aa6a18432 #U2
220
+ http://www.guardian.co.uk/music/ub40: 7113aab7-628f-4050-ae49-dbecac110ca8 #UB40
221
+ http://www.guardian.co.uk/music/urban: ca738bcc-b2ce-4dcc-af52-d1654bfd4733 #Urban Cowboyz
222
+ http://www.guardian.co.uk/music/velvetrevolver: 3a528006-1429-47f4-ae9b-2ea95343e16a #Velvet Revolver
223
+ http://www.guardian.co.uk/music/velvetunderground: 94b0fb9d-a066-4823-b2ec-af1d324bcfcf #The Velvet Underground
224
+ http://www.guardian.co.uk/music/verve: d4d17620-fd97-4574-92a8-a2cb7e72ce42 #The Verve
225
+ http://www.guardian.co.uk/music/vines: 4e045c96-538b-46ed-8ea8-7cae20b56574 #The Vines
226
+ http://www.guardian.co.uk/music/marthawainwright: 231475d8-fddb-4d7d-aad9-287e59e4b4ba #Martha Wainwright
227
+ http://www.guardian.co.uk/music/rufuswainwright: 78e46ae5-9bfd-433b-be3f-19e993d67ecc #Rufus Wainwright
228
+ http://www.guardian.co.uk/music/mward: 655b3e5b-09e4-45dd-941c-6fa3fc12521b #M. Ward
229
+ http://www.guardian.co.uk/music/werejammin: 7fa1acb8-4fb9-4947-9040-51a844969834 #Jammin'
230
+ http://www.guardian.co.uk/music/weezer: 6fe07aa5-fec0-4eca-a456-f29bff451b04 #Weezer
231
+ http://www.guardian.co.uk/music/paulweller: ac1749b5-088e-4c42-9c39-7f578ff54f6e #Paul Weller vs. Portishead
232
+ http://www.guardian.co.uk/music/kanyewest: 164f0d73-1234-4e2c-8743-d77bf2191051 #Kanye West
233
+ http://www.guardian.co.uk/music/thewhitestripes: 11ae9fbb-f3d7-4a47-936f-4c0a04d3b3b5 #The White Stripes
234
+ http://www.guardian.co.uk/music/wilco: 9e53f84d-ef44-4c16-9677-5fd4d78cbd7d #Wilco
235
+ http://www.guardian.co.uk/music/pharrellwilliams: 149f91ef-1287-46da-9a8e-87fee02f1471 #Pharrell Williams
236
+ http://www.guardian.co.uk/music/brianwilson: 9b07fae3-4442-4c40-a9e0-78d3e0540901 #Brian Wilson & Van Dyke Parks
237
+ http://www.guardian.co.uk/music/tonywilson: c90528f0-75e7-435f-82e8-dfbdcf8824d3 #Tony Wilson
238
+ http://www.guardian.co.uk/music/amywinehouse: dfe9a7c4-8cf2-47f4-9dcb-d233c2b86ec3 #Amy Winehouse
239
+ http://www.guardian.co.uk/music/patrickwolf: 4ac4e32b-bd18-402e-adad-ae00e72f8d85 #Patrick Wolf
240
+ http://www.guardian.co.uk/music/steviewonder: 1ee18fb3-18a6-4c7f-8ba0-bc41cdd0462e #Stevie Wonder
241
+ http://www.guardian.co.uk/music/wutangclan: 0febdcf7-4e1f-4661-9493-b40427de2c13 #Wu-Tang Clan
242
+ http://www.guardian.co.uk/music/xtc: 97c86b2c-2765-46a2-aef8-76a7e24c430f #XTC
243
+ http://www.guardian.co.uk/music/yeahyeahyeahs: 584c04d2-4acc-491b-8a0a-e63133f4bfc4 #Yeah Yeah Yeahs
244
+ http://www.guardian.co.uk/music/neilyoung: 0f3515b0-75c9-46c9-b26c-4cd05d26eae7 #Neil Young & Crazy Horse
245
+ http://www.guardian.co.uk/music/thezutons: 6290b769-173d-49d1-990e-660a4e333877 #The Zutons
@@ -0,0 +1,61 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+ require 'open-uri'
4
+ require 'yaml'
5
+
6
+ $: << File.expand_path(File.dirname(__FILE__))
7
+
8
+ class LinkToad
9
+ attr_reader :mapping
10
+
11
+ # +mapping+ is a hash mapping from a URL to an identifier
12
+ # that should be associated with that URL.
13
+ def initialize(mapping)
14
+ @mapping = mapping
15
+ end
16
+
17
+ # Returns the identifiers for the document at the given +url+.
18
+ #
19
+ # Identifiers are found by looking up links in the document in the +mapping+ hash.
20
+ def match(url)
21
+ links = links_from_url(url)
22
+ links.map { |l| hits_for_uri(l) }.flatten.uniq
23
+ end
24
+
25
+ protected
26
+
27
+ def links_from_url(url)
28
+ doc = Hpricot(open(url))
29
+ links = []
30
+ doc.search('//a').each do |link|
31
+ next if link.attributes['href'].nil?
32
+ begin
33
+ uri = URI.parse(link.attributes['href'].strip)
34
+ rescue URI::InvalidURIError
35
+ next
36
+ end
37
+ next unless (uri.kind_of? URI::HTTP or uri.kind_of? URI::HTTPS)
38
+ links << uri.to_s
39
+ end
40
+ links.uniq
41
+ end
42
+
43
+ def hits_for_uri(uri)
44
+ # search for gids with both a trailing / and without
45
+ uri_string = uri.gsub(%r[/$], '')
46
+ uri_strings = [ uri_string, "#{uri_string}/" ]
47
+
48
+ # search for gids with both 'www.' and without
49
+ if uri_string =~ %r[http://www\.]
50
+ uri_strings << uri_string.gsub('http://www.', 'http://')
51
+ else
52
+ uri_strings << uri_string.gsub('http://', 'http://www.')
53
+ end
54
+ uri_strings << "#{uri_strings.last}/"
55
+
56
+ # try also without the index.* if it has one
57
+ uri_strings << uri_string.sub(/index\.\w{3,4}/i, '') if uri_string =~ /index\.\w{3,4}$/i
58
+
59
+ uri_strings.map { |u| @mapping[u] }.flatten.compact.uniq
60
+ end
61
+ end
@@ -0,0 +1,10 @@
1
+ class MusicArtistsLinkToad < LinkToad
2
+
3
+ protected
4
+
5
+ def hits_for_uri(uri)
6
+ return [$2] if (uri=~%r[http://(www\.)?musicbrainz.org/artist/([-a-f0-9]{36})])
7
+ return [$2] if (uri=~%r[http://(www\.)?bbc.co.uk/music/artists/([-a-f0-9]{36})])
8
+ super
9
+ end
10
+ end
@@ -0,0 +1,81 @@
1
+ require File.join(File.dirname(__FILE__), 'spec_helper')
2
+
3
+ describe LinkToad do
4
+ before(:each) do
5
+ @toad = LinkToad.new({})
6
+ end
7
+
8
+ describe "extracting links" do
9
+ describe "from an empty doc" do
10
+ before(:each) do
11
+ @toad.expects(:open).once.returns('')
12
+ end
13
+
14
+ it "should return an empty array of links" do
15
+ @toad.send(:links_from_url, 'http://www.foo.com').should == []
16
+ end
17
+ end
18
+
19
+ describe "from a page with one link" do
20
+ before(:each) do
21
+ @toad.expects(:open).once.returns('<a href="http://www.foo.com">foo</a>')
22
+ end
23
+
24
+ it "should return that link back" do
25
+ @toad.send(:links_from_url, 'http://www.foo.com').should == [ 'http://www.foo.com' ]
26
+ end
27
+ end
28
+ end
29
+
30
+ describe "looking up gids for a uri" do
31
+ before(:each) do
32
+ @toad.mapping.merge!({
33
+ 'http://www.coldplay.com/' => 'cc197bad-dc9c-440d-a5b5-d52ba2e14234',
34
+ 'http://www.keanemusic.com/' => 'c7020c6d-cae9-4db3-92a7-e5c561cbad50',
35
+ 'http://www.gymclassheroes.com/' => 'f4d4b515-0b74-423f-a161-db184330c37c',
36
+ 'http://www.madonna.com/' => '79239441-bfd5-4981-a70c-55c3f15c1287',
37
+ 'http://www.oasisinet.com/' => '39ab1aed-75e0-4140-bd47-540276886b60',
38
+ 'http://adele.tv/' => 'cc2c9c3c-b7bc-4b8b-84d8-4fbd8779e493',
39
+ })
40
+ end
41
+
42
+ it "should return Coldplay's GID with an exact URL match" do
43
+ @toad.send(:hits_for_uri, 'http://www.coldplay.com/').should == ['cc197bad-dc9c-440d-a5b5-d52ba2e14234']
44
+ end
45
+
46
+ it "should return Coldplay's GID without the trailing slash" do
47
+ @toad.send(:hits_for_uri, 'http://www.coldplay.com').should == ['cc197bad-dc9c-440d-a5b5-d52ba2e14234']
48
+ end
49
+
50
+ it "should return Coldplay's GID with an index.php URL" do
51
+ @toad.send(:hits_for_uri, 'http://www.coldplay.com/index.php').should == ['cc197bad-dc9c-440d-a5b5-d52ba2e14234']
52
+ end
53
+
54
+ it "should return Coldplay's GID with an index.php URL" do
55
+ @toad.send(:hits_for_uri, 'http://www.coldplay.com/index.php').should == ['cc197bad-dc9c-440d-a5b5-d52ba2e14234']
56
+ end
57
+
58
+ it "should return Oasis's GID with an index.aspx URL" do
59
+ @toad.send(:hits_for_uri, 'http://www.oasisinet.com/index.aspx').should == ['39ab1aed-75e0-4140-bd47-540276886b60']
60
+ @toad.send(:hits_for_uri, 'http://www.oasisinet.com/Index.aspx').should == ['39ab1aed-75e0-4140-bd47-540276886b60']
61
+ end
62
+
63
+ it "should return Madonna's GID with an exact URL match" do
64
+ @toad.send(:hits_for_uri, 'http://www.madonna.com/').should == ['79239441-bfd5-4981-a70c-55c3f15c1287']
65
+ end
66
+
67
+ it "should return Madonna's GID with URL omitting www." do
68
+ @toad.send(:hits_for_uri, 'http://madonna.com').should == ['79239441-bfd5-4981-a70c-55c3f15c1287']
69
+ @toad.send(:hits_for_uri, 'http://madonna.com/').should == ['79239441-bfd5-4981-a70c-55c3f15c1287']
70
+ end
71
+
72
+ it "should return Adele's GID with an exact URL match" do
73
+ @toad.send(:hits_for_uri, 'http://adele.tv/').should == ['cc2c9c3c-b7bc-4b8b-84d8-4fbd8779e493']
74
+ end
75
+
76
+ it "should return Adele's GID with URL omitting www." do
77
+ @toad.send(:hits_for_uri, 'http://www.adele.tv').should == ['cc2c9c3c-b7bc-4b8b-84d8-4fbd8779e493']
78
+ @toad.send(:hits_for_uri, 'http://www.adele.tv/').should == ['cc2c9c3c-b7bc-4b8b-84d8-4fbd8779e493']
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,28 @@
1
+ require File.join(File.dirname(__FILE__), 'spec_helper')
2
+
3
+ require 'link_toad/music_link_toad'
4
+
5
+ describe MusicArtistsLinkToad do
6
+ before(:each) do
7
+ @toad = MusicArtistsLinkToad.new({})
8
+ end
9
+
10
+ describe "looking up gids for a uri" do
11
+
12
+ it "should match a BBC Artist URL" do
13
+ [ 'http://bbc.co.uk/music/artists/cc197bad-dc9c-440d-a5b5-d52ba2e14234',
14
+ 'http://bbc.co.uk/music/artists/cc197bad-dc9c-440d-a5b5-d52ba2e14234/',
15
+ 'http://www.bbc.co.uk/music/artists/cc197bad-dc9c-440d-a5b5-d52ba2e14234',
16
+ 'http://www.bbc.co.uk/music/artists/cc197bad-dc9c-440d-a5b5-d52ba2e14234/',
17
+ ].each { |uri| @toad.send(:hits_for_uri, uri).should == [ 'cc197bad-dc9c-440d-a5b5-d52ba2e14234' ] }
18
+ end
19
+
20
+ it "should match a MusicBrainz URL" do
21
+ [ 'http://musicbrainz.org/artist/cc197bad-dc9c-440d-a5b5-d52ba2e14234',
22
+ 'http://musicbrainz.org/artist/cc197bad-dc9c-440d-a5b5-d52ba2e14234/',
23
+ 'http://www.musicbrainz.org/artist/cc197bad-dc9c-440d-a5b5-d52ba2e14234',
24
+ 'http://www.musicbrainz.org/artist/cc197bad-dc9c-440d-a5b5-d52ba2e14234/',
25
+ ].each { |uri| @toad.send(:hits_for_uri, uri).should == [ 'cc197bad-dc9c-440d-a5b5-d52ba2e14234' ] }
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,2 @@
1
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'link_toad')
2
+ require 'mocha'
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: metade-link_toad
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Patrick Sinclair
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-10-11 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">"
21
+ - !ruby/object:Gem::Version
22
+ version: 0.0.0
23
+ version:
24
+ description: LinkToad is a general purpose equivalency engine that uses hyperlinks.
25
+ email: metade@gmail.com
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files:
31
+ - README
32
+ files:
33
+ - README
34
+ - lib/link_toad.rb
35
+ - lib/link_toad/music_link_toad.rb
36
+ - examples/musicbrainz/guardian/README
37
+ - examples/musicbrainz/guardian/guardian-music-news.rb
38
+ - examples/musicbrainz/guardian/guardian-musicbrainz.rb
39
+ - examples/musicbrainz/guardian/guardian-musicbrainz.yml
40
+ has_rdoc: true
41
+ homepage: http://github.com/metade/link_toad
42
+ post_install_message:
43
+ rdoc_options:
44
+ - --main
45
+ - README
46
+ - -x
47
+ - example
48
+ - -x
49
+ - spec
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ version:
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: "0"
63
+ version:
64
+ requirements: []
65
+
66
+ rubyforge_project:
67
+ rubygems_version: 1.2.0
68
+ signing_key:
69
+ specification_version: 2
70
+ summary: "LinkToad: hyperlink-powered equivalency engine."
71
+ test_files:
72
+ - spec/spec_helper.rb
73
+ - spec/link_toad_spec.rb
74
+ - spec/music_link_toad_spec.rb