metade-link_toad 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,48 @@
1
+ = link_toad
2
+
3
+ * http://github.com/metade/link_toad
4
+
5
+ == DESCRIPTION:
6
+
7
+ LinkToad is a general purpose equivalency engine that uses hyperlinks.
8
+
9
+ == USAGE:
10
+
11
+ require 'rubygems'
12
+ require 'link_toad'
13
+
14
+ # mapping of artist-related URLs to MusicBrainz artist identifier
15
+ mapping = {
16
+ 'http://www.arcticmonkeys.com/' => 'ada7a83c-e3e1-40f1-93f9-3e73dbc9298a',
17
+ }
18
+ toad = LinkToad.new(mapping)
19
+
20
+ # match a news story linking to the Arctic Monkeys homepage
21
+ toad.match('http://news.bbc.co.uk/1/hi/entertainment/7664082.stm')
22
+ => ["ada7a83c-e3e1-40f1-93f9-3e73dbc9298a"]
23
+
24
+ See +examples/+ for more details.
25
+
26
+ == LICENSE:
27
+
28
+ The MIT License
29
+
30
+ Copyright (c) 2008 Patrick Sinclair
31
+
32
+ Permission is hereby granted, free of charge, to any person obtaining a copy
33
+ of this software and associated documentation files (the "Software"), to deal
34
+ in the Software without restriction, including without limitation the rights
35
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
36
+ copies of the Software, and to permit persons to whom the Software is
37
+ furnished to do so, subject to the following conditions:
38
+
39
+ The above copyright notice and this permission notice shall be included in
40
+ all copies or substantial portions of the Software.
41
+
42
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
43
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
44
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
45
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
46
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
47
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
48
+ THE SOFTWARE.
@@ -0,0 +1,13 @@
1
+ These scripts enable news items on the Guardian RSS feed to be associated to a particular MusicBrainz artist.
2
+
3
+ guardian-musicbrainz.rb
4
+ -----------------------
5
+
6
+ * pulls in the full list of music related tag from the guardian web site
7
+ * for each sensible-looking tag it searches MusicBrainz for a matching artist
8
+ * it outputs a YAML file mapping Guardian tag URLs to artist GUIDs
9
+
10
+ guardian-music-news.rb
11
+ ----------------------
12
+
13
+ Uses the mapping produced by +guardian-musicbrainz.rb+ to associate stories from the Guardian Music RSS feed to MusicBrainz artist.
@@ -0,0 +1,15 @@
1
+ require 'rubygems'
2
+ require 'rss/2.0'
3
+ require 'yaml'
4
+ require '../../../lib/link_toad.rb'
5
+
6
+ mapping = YAML.load_file('guardian-musicbrainz.yml')
7
+ toad = LinkToad.new(mapping)
8
+
9
+ feed = RSS::Parser.parse(open('http://www.guardian.co.uk/music/rss'))
10
+ feed.items.each do |item|
11
+ url = item.guid.content
12
+ gids = toad.match(url)
13
+ puts "#{url}: #{gids.inspect}"
14
+ end
15
+
@@ -0,0 +1,40 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+ require 'rbrainz'
4
+ require 'open-uri'
5
+ require 'yaml'
6
+
7
+ file = File.open('guardian-musicbrainz.yml', 'w')
8
+ file.puts('---')
9
+
10
+ ignore_list = [
11
+ # guardian specific
12
+ /alternativetop100albums/, /bobmarley60thanniversary/,
13
+ # events
14
+ /britawards/, /eurovision/, /glastonbury/, /live8/, /meltdownfestival/,
15
+ /mercuryprize/, /o2wirelessweekenders/, /proms/, /readingandleedsfestival/,
16
+ # genres
17
+ /classicalmusicandopera/, /downloads/, /electronicmusic/, /folk/,
18
+ /jazz/, /popandrock/, /worldmusic/
19
+ ]
20
+
21
+ urls = []
22
+ q = MusicBrainz::Webservice::Query.new
23
+ doc = Hpricot(open('http://www.guardian.co.uk/music/list/allmusickeywords'))
24
+ doc.search('//a').each do |link|
25
+ url = link.attributes['href']
26
+ next if url.nil?
27
+ next if urls.include? url
28
+ next unless url =~ %r[http://www.guardian.co.uk/music/(\w+)$]
29
+ next if ignore_list.detect { |re| url =~ re }
30
+ urls << url
31
+
32
+ name = link.inner_html.strip
33
+ results = q.get_artists(MusicBrainz::Webservice::ArtistFilter.new(:query => name))
34
+ sleep 1 # don't hit MusicBrainz too hard
35
+
36
+ if results.size>0
37
+ m_name, gid = results[0].entity.name, results[0].entity.id.uuid
38
+ file.puts("#{url}: #{gid} ##{m_name}")
39
+ end
40
+ end
@@ -0,0 +1,245 @@
1
+ ---
2
+ http://www.guardian.co.uk/music/2pac: 382f1005-e9ab-4684-afd4-0bdae4ee37f2 #2Pac
3
+ http://www.guardian.co.uk/music/50cent: 6e3db4ff-39cc-4675-8310-5ace0cd63bfa #50 Cent & Eminem
4
+ http://www.guardian.co.uk/music/aaliyah: bc85da58-52d9-457d-ae8d-5d8d4ec870a9 #Aaliyah
5
+ http://www.guardian.co.uk/music/abba: d87e52c5-bb8d-4da8-b941-9f4928627dc8 #ABBA
6
+ http://www.guardian.co.uk/music/ryanadams: c80f38a6-9980-485d-997c-5c1a9cbd0d64 #Ryan Adams
7
+ http://www.guardian.co.uk/music/christinaaguilera: b202beb7-99bd-47e7-8b72-195c8d72ebdd #Christina Aguilera
8
+ http://www.guardian.co.uk/music/akon: 1138a764-2212-4d0a-b02d-0dc14df91e08 #Akon
9
+ http://www.guardian.co.uk/music/alltomorrowsparties: 1771f952-52c9-4de4-8c65-8ca95f932652 #All Tomorrow's Party
10
+ http://www.guardian.co.uk/music/lilyallen: 6e0c7c0e-cba5-4c2c-a652-38f71ef5785d #Lily Allen
11
+ http://www.guardian.co.uk/music/toriamos: c0b2500e-0cef-4130-869d-732b23ed9df5 #Tori Amos
12
+ http://www.guardian.co.uk/music/animalcollective: 0c751690-c784-4a4f-b1e4-c1de27d47581 #Animal Collective
13
+ http://www.guardian.co.uk/music/antonyandthejohnsons: 90cc2464-234e-4da0-b39b-576f36e633bc #Antony and the Johnsons
14
+ http://www.guardian.co.uk/music/fionaapple: a9ee533f-8871-4f62-a6bb-91eb264abc90 #Fiona Apple
15
+ http://www.guardian.co.uk/music/arcadefire: 52074ba6-e495-4ef3-9bb4-0703888a9f68 #Arcade Fire
16
+ http://www.guardian.co.uk/music/arcticmonkeys: ada7a83c-e3e1-40f1-93f9-3e73dbc9298a #Arctic Monkeys
17
+ http://www.guardian.co.uk/music/richardashcroft: 2d1d8985-47bc-4244-8cc3-577584e411f6 #Richard Ashcroft
18
+ http://www.guardian.co.uk/music/babyshambles: 8e1e03fe-ebbc-467a-b541-857144db10fb #Babyshambles
19
+ http://www.guardian.co.uk/music/jsbach: e2f2a608-9a8b-47b7-81d1-04af13632336 #JS
20
+ http://www.guardian.co.uk/music/badlydrawnboy: 0881daf1-20df-4a3e-a84f-6476a84bb172 #Badly Drawn Boy
21
+ http://www.guardian.co.uk/music/erykahbadu: 321531fc-db73-4ffa-a959-61a61a2908c1 #Erykah Badu
22
+ http://www.guardian.co.uk/music/sydbarrett: 12327d75-47d5-45d9-84c2-3760b9210c17 #Syd Barrett
23
+ http://www.guardian.co.uk/music/basementjaxx: 28cbf94d-0700-4095-a188-37e373b069a7 #Basement Jaxx
24
+ http://www.guardian.co.uk/music/batforlashes: 10000730-525f-4ed5-aaa8-92888f060f5f #Bat for Lashes
25
+ http://www.guardian.co.uk/music/battles: 8522b9b6-b295-48d7-9a10-8618fb80beb8 #Battles
26
+ http://www.guardian.co.uk/music/thebeatles: b10bbbfc-cf9e-42e0-be17-e2c3e1d2600d #The Beatles
27
+ http://www.guardian.co.uk/music/beck: 0df890e1-f4f2-4b21-a413-cd8af1af32d8 #Christophe Beck
28
+ http://www.guardian.co.uk/music/belleandsebastian: e5c7b94f-e264-473c-bb0f-37c85d4d5c70 #Belle and Sebastian
29
+ http://www.guardian.co.uk/music/bjork: 87c5dedd-371d-4a53-9f7f-80522fb7f3cb #Björk
30
+ http://www.guardian.co.uk/music/blackflag: 9941a936-196a-4a62-ae53-a69cbc33f20e #Black Flag
31
+ http://www.guardian.co.uk/music/frankblack: 98a35a4d-a845-4282-bc7c-5679f2bb9bd2 #Frank Black
32
+ http://www.guardian.co.uk/music/blacksabbath: 5182c1d9-c7d2-4dad-afa0-ccfeada921a8 #Black Sabbath
33
+ http://www.guardian.co.uk/music/blocparty: 8c538f11-c141-4588-8ecb-931083524186 #Bloc Party
34
+ http://www.guardian.co.uk/music/blondie: 4d2956d1-a3f7-44bb-9a41-67563e1a0c94 #Blondie
35
+ http://www.guardian.co.uk/music/blur: ba853904-ae25-4ebb-89d6-c44cfbd71bd2 #Blur
36
+ http://www.guardian.co.uk/music/bonnieprincebilly: 7b0a63b2-b8e8-490e-b724-1d30cb5edfe3 #Bonnie 'Prince' Billy
37
+ http://www.guardian.co.uk/music/davidbowie: 5441c29d-3602-4898-b1a1-b77fa23b8e50 #David Bowie
38
+ http://www.guardian.co.uk/music/breeders: 667e856e-f3a7-42e9-8244-95422ef31321 #The Breeders
39
+ http://www.guardian.co.uk/music/foxybrown: c0235fec-bb90-4976-8da1-5d1b37e158ec #Foxy Brown & Jay-Z
40
+ http://www.guardian.co.uk/music/ianbrown: 182d5d8a-94c7-4ef4-8222-a1838353a37b #Ian Brown
41
+ http://www.guardian.co.uk/music/buzzcocks: 31e9c35b-2675-4632-8596-f9bd9286f6c8 #Buzzcocks
42
+ http://www.guardian.co.uk/music/camdencrawl: b1dffc4b-0b99-4037-a726-0a111e0ca812 #Camden
43
+ http://www.guardian.co.uk/music/mariahcarey: 494e8d09-f85b-4543-892f-a5096aed1cd4 #Mariah Carey
44
+ http://www.guardian.co.uk/music/johnnycash: d43d12a1-2dc9-4257-a2fd-0a3bb1081b86 #Johnny Cash
45
+ http://www.guardian.co.uk/music/charlatans: 8434409e-baa9-4e12-b4aa-566a91c7d7cf #The Charlatans
46
+ http://www.guardian.co.uk/music/raycharles: 2ce02909-598b-44ef-a456-151ba0a3bd70 #Ray Charles
47
+ http://www.guardian.co.uk/music/clapyourhandssayyeah: 4b2d6a23-034d-4a29-9bb9-d2462796da4e #Clap Your Hands Say Yeah
48
+ http://www.guardian.co.uk/music/ericclapton: 618b6900-0618-4f1e-b835-bccb17f84294 #Eric Clapton
49
+ http://www.guardian.co.uk/music/clash: 8f92558c-2baa-4758-8c38-615519e9deda #The Clash
50
+ http://www.guardian.co.uk/music/jimmycliff: 2caa54a7-b08c-41da-b892-3a41abe778be #Jimmy Cliff
51
+ http://www.guardian.co.uk/music/clinic: 1ea2c08f-323c-4207-9af1-e2fb8588f08e #Clinic
52
+ http://www.guardian.co.uk/music/georgeclinton: 7e22c4e9-1355-48d3-af58-03347e95b5f3 #George S. Clinton
53
+ http://www.guardian.co.uk/music/clubs: a3751fb8-b8cc-4b43-a1cb-3cb39a074402 #Clubbing
54
+ http://www.guardian.co.uk/music/coldplay: cc197bad-dc9c-440d-a5b5-d52ba2e14234 #Coldplay
55
+ http://www.guardian.co.uk/music/alicecooper: 4d7928cd-7ed2-4282-8c29-c0c9f966f1bd #Alice Cooper
56
+ http://www.guardian.co.uk/music/elviscostello: 8a338e06-d182-46f2-bd16-30a09bc840ba #Elvis Costello
57
+ http://www.guardian.co.uk/music/cribs: a3a92047-be1c-4f3e-8960-c4f8570984df #The Cribs
58
+ http://www.guardian.co.uk/music/crosbystillsnashandyoung: 46a782ea-4308-476b-abd1-a91b197f3037 #Crosby, Stills, Nash & Young
59
+ http://www.guardian.co.uk/music/cypresshill: 51508c1f-8d07-4a00-9cf1-26c570fe7b78 #Cypress Hill
60
+ http://www.guardian.co.uk/music/dangermouse: 4b356f05-bcc2-4544-925b-fd9a1bf708be #Danger Mouse
61
+ http://www.guardian.co.uk/music/raydavies: a0ecce6d-7dfd-4e3c-9d1e-26465244450d #Ray Davies & His Funky Trumpet
62
+ http://www.guardian.co.uk/music/deathcabforcutie: 0039c7ae-e1a7-4a7d-9b49-0cbc716821a6 #Death Cab for Cutie
63
+ http://www.guardian.co.uk/music/defleppard: 7249b899-8db8-43e7-9e6e-22f1e736024e #Def Leppard
64
+ http://www.guardian.co.uk/music/devo: 4d8a5bce-7f33-4fd2-bf8f-e8f6cf467373 #Devo Springsteen
65
+ http://www.guardian.co.uk/music/dinosaurjr: 77c167d2-4965-4421-830a-9815e4956475 #Dinosaur Jr.
66
+ http://www.guardian.co.uk/music/direstraits: 614e3804-7d34-41ba-857f-811bad7c2b7a #Dire Straits
67
+ http://www.guardian.co.uk/music/dirtyprettythings: 648615ca-ca74-460d-928a-2bae67ae6d14 #Dirty Pretty Things
68
+ http://www.guardian.co.uk/music/dizzeerascal: 1a99cc88-aea3-4fe3-96b9-20791667f65f #Dizzee Rascal
69
+ http://www.guardian.co.uk/music/petedoherty: ff041743-15eb-49db-933f-8bc66a4a3235 #Pete Doherty
70
+ http://www.guardian.co.uk/music/doors: 9efff43b-3b29-4082-824e-bc82f646f93d #The Doors
71
+ http://www.guardian.co.uk/music/drdre: 5f6ab597-f57a-40da-be9e-adad48708203 #Dr. Dre
72
+ http://www.guardian.co.uk/music/duranduran: 2f1baa8d-aad7-4bf5-b5f2-ec857e20129a #Andy Durán
73
+ http://www.guardian.co.uk/music/bobdylan: 72c536dc-7137-4477-a521-567eeb840fa8 #Bob Dylan
74
+ http://www.guardian.co.uk/music/eagles: f46bd570-5768-462e-b84c-c7c993bbf47e #Eagles
75
+ http://www.guardian.co.uk/music/echoandthebunnymen: ccd4879c-5e88-4385-b131-bf65296bf245 #Echo & The Bunnymen
76
+ http://www.guardian.co.uk/music/eels: 14387b0f-765c-4852-852f-135335790466 #Eels
77
+ http://www.guardian.co.uk/music/elbow: 3cb3928a-526c-4a3d-93c5-53315fa9bde0 #Elbow
78
+ http://www.guardian.co.uk/music/missyelliott: a0b8cb9e-7532-45fe-a74c-30e7c4009a39 #Missy Elliott
79
+ http://www.guardian.co.uk/music/elvis25yearson: 6bd50a1f-c6b6-49ab-97b5-87cc5430f16c #17 Years
80
+ http://www.guardian.co.uk/music/eminem: b95ce3ff-3d05-4e87-9e01-c97b66af13d4 #Eminem
81
+ http://www.guardian.co.uk/music/brianeno: ff95eb47-41c4-4f7f-a104-cdc30f02e872 #Brian Eno
82
+ http://www.guardian.co.uk/music/falloutboy: 516cef4d-0718-4007-9939-f9b38af3f784 #Fall Out Boy
83
+ http://www.guardian.co.uk/music/fall: d5da1841-9bc8-4813-9f89-11098090148e #The Fall
84
+ http://www.guardian.co.uk/music/bryanferry: 4ef7a9e2-2cf5-483a-8616-ef7791a98026 #Bryan Ferry
85
+ http://www.guardian.co.uk/music/foals: 6a65d878-fcd0-42cf-aff9-ca1d636a8bcc #Foals
86
+ http://www.guardian.co.uk/music/foofighters: 67f66c07-6e61-4026-ade5-7e782fad3a5d #Foo Fighters
87
+ http://www.guardian.co.uk/music/franzferdinand: aa7a2827-f74b-473c-bd79-03d065835cf7 #Franz Ferdinand
88
+ http://www.guardian.co.uk/music/futureheads: 0cf9d983-aecf-4d57-9e94-fde2beac014f #The Futureheads
89
+ http://www.guardian.co.uk/music/gallows: 4cb159df-bd09-4f42-bfec-09b6a248a52f #Gallows
90
+ http://www.guardian.co.uk/music/gangoffour: d8661c02-f423-4d72-8044-40ff05daf7a1 #Gang of Four
91
+ http://www.guardian.co.uk/music/georgeharrison: 42a8f507-8412-4611-854f-926571049fa0 #George Harrison
92
+ http://www.guardian.co.uk/music/georgemichael: ccb8f30e-4d71-40c4-8b1d-846dafe73e2c #George Michael
93
+ http://www.guardian.co.uk/music/girlsaloud: a0b2f210-cd3a-453d-937d-e4f2658d17c7 #Girls Aloud
94
+ http://www.guardian.co.uk/music/gnarlsbarkley: a47c3aa2-7d87-475c-a2c7-1e2047dafb09 #Gnarls Barkley
95
+ http://www.guardian.co.uk/music/gorillaz: e21857d5-3256-4547-afb3-4b6ded592596 #Gorillaz
96
+ http://www.guardian.co.uk/music/greenday: 084308bd-1654-436f-ba03-df6697104e19 #Green Day
97
+ http://www.guardian.co.uk/music/grinderman: 611f1230-7c2b-4610-b9e6-5a4fcb6e3cc7 #Grinderman
98
+ http://www.guardian.co.uk/music/groovearmada: 35723b60-732e-4bd8-957f-320b416e7b7f #Groove Armada
99
+ http://www.guardian.co.uk/music/gunsnroses: eeb1195b-f213-4ce1-b28c-8565211f8e43 #Guns N' Roses
100
+ http://www.guardian.co.uk/music/pjharvey: e795e03d-b5d5-4a5f-834d-162cfb308a2c #PJ Harvey
101
+ http://www.guardian.co.uk/music/isaachayes: a2361c7d-ddb6-41c1-a9a0-be09fbbb8d21 #Isaac Hayes
102
+ http://www.guardian.co.uk/music/icecube: b1274489-1832-4fd9-a7b6-9414d0b05f62 #Dr. Dre & Ice Cube
103
+ http://www.guardian.co.uk/music/indigogirls: 00c49f40-d715-4b79-b223-432048602cce #Indigo Girls
104
+ http://www.guardian.co.uk/music/inxs: 481bf5f9-2e7c-4c44-b08a-05b32bc7c00d #INXS
105
+ http://www.guardian.co.uk/music/boniver: 437a0e49-c6ae-42f6-a6c1-84f25ed366bc #Bon Iver
106
+ http://www.guardian.co.uk/music/michaeljackson: f27ec8db-af05-4f36-916e-3d57f91ecf5e #Michael Jackson
107
+ http://www.guardian.co.uk/music/jam: 23228f18-01d5-493e-94ce-cfcde82a8db2 #The Jam
108
+ http://www.guardian.co.uk/music/jayz: f82bcf78-5b69-4622-a5ef-73800768d9ac #Jay-Z
109
+ http://www.guardian.co.uk/music/jesusandmarychain: e938a15c-b17e-4e7a-9f68-ff0d536cab44 #The Jesus and Mary Chain
110
+ http://www.guardian.co.uk/music/johnfordham: fc3d0f8f-236e-4012-afc3-3002c4d2530c #Debra Fordham
111
+ http://www.guardian.co.uk/music/johnlennon: 4d5447d7-c61c-4120-ba1b-d7f471d385b9 #John Lennon
112
+ http://www.guardian.co.uk/music/joydivision: 9a58fda3-f4ed-4080-a3a5-f457aac9fcdd #Joy Division
113
+ http://www.guardian.co.uk/music/kaiserchiefs: 90218af4-4d58-4821-8d41-2ee295ebbe21 #Kaiser Chiefs
114
+ http://www.guardian.co.uk/music/kasabian: 69b39eab-6577-46a4-a9f5-817839092033 #Kasabian
115
+ http://www.guardian.co.uk/music/kelis: 1239d1bc-cc09-43e0-bcd0-374f60346138 #Kelis
116
+ http://www.guardian.co.uk/music/aliciakeys: e92aa52d-bb07-4ed7-bcca-2c3f7e93d863 #Usher & Alicia Keys
117
+ http://www.guardian.co.uk/music/rilokiley: eaf6a7ca-105d-4a94-ba02-8c3e4040319a #Rilo Kiley
118
+ http://www.guardian.co.uk/music/killers: 95e1ead9-4d31-4808-a7ac-32c3614c116b #The Killers
119
+ http://www.guardian.co.uk/music/kingsofconvenience: cf0f4547-ffbd-4011-98ad-0bec9ba022db #Kings of Convenience
120
+ http://www.guardian.co.uk/music/kingsofleon: 6ffb8ea9-2370-44d8-b678-e9237bbd347b #Kings of Leon
121
+ http://www.guardian.co.uk/music/kinks: 17b53d9f-5c63-4a09-a593-dde4608e0db9 #The Kinks
122
+ http://www.guardian.co.uk/music/kiss: 98b67ebc-5606-4cdb-9787-47b12cceb101 #Kiss
123
+ http://www.guardian.co.uk/music/alisonkrauss: 6b064ead-91a4-4ac8-8076-b1febe4f4aac #Alison Krauss
124
+ http://www.guardian.co.uk/music/ledzeppelin: 678d88b2-87b0-403b-b63d-5da7465aecc3 #Led Zeppelin
125
+ http://www.guardian.co.uk/music/johnlegend: 75a72702-a5ef-4513-bca5-c5b944903546 #John Legend
126
+ http://www.guardian.co.uk/music/leonalewis: 8d552dfc-648f-401f-90de-e925013ca537 #Leona Lewis
127
+ http://www.guardian.co.uk/music/libertines: 82b304c0-7da4-45d3-896a-0767c7ae1141 #The Libertines
128
+ http://www.guardian.co.uk/music/llcoolj: a4dd0e77-83b8-4e92-89b7-effb0e47fd8c #LL Cool J
129
+ http://www.guardian.co.uk/music/love: 34ec9a8d-c65b-48fd-bcdd-aad2f72fdb47 #Love
130
+ http://www.guardian.co.uk/music/courtney: 31d2041c-985d-48f7-b6e2-2a70cdf14853 #Courtney Love
131
+ http://www.guardian.co.uk/music/nicklowe: a3d5a6bf-c86d-44d3-969b-f345620009c6 #Nick Lowe
132
+ http://www.guardian.co.uk/music/luna: 107ed89a-88ff-4a90-8b75-2619dc7ba950 #Luna-C
133
+ http://www.guardian.co.uk/music/madonna: 79239441-bfd5-4981-a70c-55c3f15c1287 #Madonna
134
+ http://www.guardian.co.uk/music/bobmarley: c33627c6-ef0d-49de-9ef0-c4804190040f #Bob Marley
135
+ http://www.guardian.co.uk/music/johnnymarr: d6ee4ecc-ed0a-42f3-a3c8-d943bd63744f #Johnny Marr + The Healers
136
+ http://www.guardian.co.uk/music/maximopark: 92e634a7-6023-4be8-be15-ebba822f5b34 #Maxïmo Park
137
+ http://www.guardian.co.uk/music/paulmccartney: cd7a47b2-bdcb-413f-a939-7d8d55bd6fc3 #Paul McCartney & Linda McCartney
138
+ http://www.guardian.co.uk/music/metallica: 65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab #Metallica
139
+ http://www.guardian.co.uk/music/mia: 5f6be871-eb98-42f1-bce4-5a3d8212c281 #M.I.A.
140
+ http://www.guardian.co.uk/music/michaeljacksontrial: f27ec8db-af05-4f36-916e-3d57f91ecf5e #Michael Jackson
141
+ http://www.guardian.co.uk/music/kylieminogue: 2fddb92d-24b2-46a5-bf28-3aed46f4684c #Kylie Minogue
142
+ http://www.guardian.co.uk/music/jonimitchell: a6de8ef9-b1a1-4756-97aa-481bbb8a4069 #Joni Mitchell
143
+ http://www.guardian.co.uk/music/modestmouse: a96ac800-bfcb-412a-8a63-0a98df600700 #Modest Mouse
144
+ http://www.guardian.co.uk/music/vanmorrison: a41ac10f-0a56-4672-9161-b83f9b223559 #Van Morrison
145
+ http://www.guardian.co.uk/music/morrissey: 013fa897-86db-41d3-8e9f-386c8a34f4e6 #Morrissey
146
+ http://www.guardian.co.uk/music/mozart: b972f589-fb0e-474e-b64a-803b0364fa75 #Wolfgang Amadeus Mozart
147
+ http://www.guardian.co.uk/music/mudhoney: e675295a-1efe-4247-aa3b-53b78d0cdffc #Mudhoney
148
+ http://www.guardian.co.uk/music/muse: 9c9f1380-2516-4fc9-a3e6-f9f61941d090 #Muse
149
+ http://www.guardian.co.uk/music/netmusic: a10ffaf2-0d3f-4d89-bc42-914c36e86f02 #Internet
150
+ http://www.guardian.co.uk/music/roxymusic: 331ce348-1b08-40b9-8ed7-0763b92bd003 #Roxy Music
151
+ http://www.guardian.co.uk/music/mychemicalromance: c07f0676-9143-4217-8a9f-4c26bd636f13 #My Chemical Romance
152
+ http://www.guardian.co.uk/music/neptunes: cc85e0b6-e953-4602-be9c-8c4218e417de #Neptunes
153
+ http://www.guardian.co.uk/music/nerd: 3fb49f5a-fdc0-4789-9c84-22b38b3f3cb5 #N.E.R.D.
154
+ http://www.guardian.co.uk/music/neworder: af1995e4-16a1-4c05-9ac9-082edeb3a099 #Kylie Minogue vs. New Order
155
+ http://www.guardian.co.uk/music/stevienicks: 4b1bf05d-0e3c-44a3-9fc8-ad088fc25797 #Buckingham Nicks
156
+ http://www.guardian.co.uk/music/nineinchnails: b7ffd2af-418f-4be2-bdd1-22f8b48613da #Nine Inch Nails
157
+ http://www.guardian.co.uk/music/nirvana: 5b11f4ce-a62d-471e-81fc-a69a8278c7da #Nirvana
158
+ http://www.guardian.co.uk/music/nodoubt: fbd2a255-1d57-4d31-ac11-65b671c19958 #No Doubt
159
+ http://www.guardian.co.uk/music/garynuman: 6cb79cb2-9087-44d4-828b-5c6fdff2c957 #Gary Numan
160
+ http://www.guardian.co.uk/music/nwa: 3a54bffa-2314-44a2-927b-60144119c780 #N.W.A
161
+ http://www.guardian.co.uk/music/oasis: 39ab1aed-75e0-4140-bd47-540276886b60 #Oasis
162
+ http://www.guardian.co.uk/music/oldirtybastard: d50548a0-3cfd-4d7a-964b-0aef6545d819 #Ol' Dirty Bastard
163
+ http://www.guardian.co.uk/music/orbital: f3e2a7d9-c6bb-4848-95e5-04c0a1e2f511 #Orbital
164
+ http://www.guardian.co.uk/music/ozzyosbourne: 8aa5b65a-5b3c-4029-92bf-47a544356934 #Ozzy Osbourne
165
+ http://www.guardian.co.uk/music/outkast: 73fdb566-a9b1-494c-9f32-51768ec9fd27 #OutKast
166
+ http://www.guardian.co.uk/music/panicatthedisco: b9472588-93f3-4922-a1a2-74082cdf9ce8 #Panic at the Disco
167
+ http://www.guardian.co.uk/music/gramparsons: cbc83fb1-2c23-4ad1-9187-594b30da3f21 #Gram Parsons
168
+ http://www.guardian.co.uk/music/peaches: 270acfee-1fbe-413e-a0a8-6a35a8b3b66c #Peaches
169
+ http://www.guardian.co.uk/music/pearljam: 83b9cbe7-9857-49e2-ab8e-b57b01038103 #Pearl Jam
170
+ http://www.guardian.co.uk/music/johnpeel: 65ddc5dd-6e1c-4f70-bee3-b67703bbf4c8 #John Peel
171
+ http://www.guardian.co.uk/music/petshopboys: be540c02-7898-4b79-9acc-c8122c7d9e83 #Pet Shop Boys
172
+ http://www.guardian.co.uk/music/tompetty: f93dbc64-6f08-4033-bcc7-8a0bb4689849 #Tom Petty and The Heartbreakers
173
+ http://www.guardian.co.uk/music/pinkfloyd: 83d91898-7763-47d7-b03b-b92132375c47 #Pink Floyd
174
+ http://www.guardian.co.uk/music/pixies: b6b2bb8d-54a9-491f-9607-7b546023b433 #Pixies
175
+ http://www.guardian.co.uk/music/iggypop: f37b3f31-b1f8-4b88-8cb5-b34f709b17d7 #Iggy Pop
176
+ http://www.guardian.co.uk/music/portishead: 8f6bd1e4-fbe1-4f50-aa9b-94c450ec0f11 #Portishead
177
+ http://www.guardian.co.uk/music/elvispresley: 01809552-4f87-45b0-afff-2c6f0730a3be #Elvis Presley
178
+ http://www.guardian.co.uk/music/primalscream: 55704c38-224f-4b75-b29f-d43653f8bc9a #Primal Scream
179
+ http://www.guardian.co.uk/music/prince: 070d193a-845c-479f-980e-bef15710653e #Prince
180
+ http://www.guardian.co.uk/music/prodigy: 4a4ee089-93b1-4470-af9a-6ff575d32704 #The Prodigy
181
+ http://www.guardian.co.uk/music/pulp: 76b2e842-5e85-4c97-ab62-d5bc315595b5 #Pulp
182
+ http://www.guardian.co.uk/music/queensofthestoneage: 7dc8f5bd-9d0b-4087-9f73-dc164950bbd8 #Queens of the Stone Age
183
+ http://www.guardian.co.uk/music/raconteurs: be407b02-f3e6-4ed5-9489-f8e5f0ab36dc #The Raconteurs
184
+ http://www.guardian.co.uk/music/radiohead: a74b1b7f-71a5-4011-9441-d0b5e4122711 #Radiohead
185
+ http://www.guardian.co.uk/music/ramones: d6ed7887-a401-47a8-893c-34b967444d26 #Ramones
186
+ http://www.guardian.co.uk/music/razorlight: f2cb0435-d643-4fab-9587-fdb0279330a7 #Razorlight
187
+ http://www.guardian.co.uk/music/redhotchilipeppers: 8bfac288-ccc5-448d-9573-c33ea2aa5c30 #Red Hot Chili Peppers
188
+ http://www.guardian.co.uk/music/keithrichards: f0ed72a3-ae8f-4cf7-b51d-2696a2330230 #Keith Richards
189
+ http://www.guardian.co.uk/music/santogold: d7311646-287b-4d3a-9a4f-7d46f93075e5 #Santogold
190
+ http://www.guardian.co.uk/music/sexpistols: e5db18cb-4b1f-496d-a308-548b611090d3 #Sex Pistols
191
+ http://www.guardian.co.uk/music/simonandgarfunkel: 5d02f264-e225-41ff-83f7-d9b1f0b1874a #Simon & Garfunkel
192
+ http://www.guardian.co.uk/music/siouxsieandthebanshees: 78ea5ea1-3c4d-4b7e-ac5d-68900319ebe2 #Siouxsie and The Banshees
193
+ http://www.guardian.co.uk/music/smashingpumpkins: ba0d6274-db14-4ef5-b28d-657ebde1a396 #The Smashing Pumpkins
194
+ http://www.guardian.co.uk/music/elliottsmith: 03ad1736-b7c9-412a-b442-82536d63a5c4 #Elliott Smith
195
+ http://www.guardian.co.uk/music/smiths: 40f5d9e4-2de7-4f2d-ad41-e31a9a9fea27 #The Smiths
196
+ http://www.guardian.co.uk/music/snoopdogg: f90e8b26-9e52-4669-a5c9-e28529c47894 #Snoop Dogg
197
+ http://www.guardian.co.uk/music/samsparro: cd71e6e9-42bb-4a1a-b5ce-17f41682b3e2 #Sam Sparro
198
+ http://www.guardian.co.uk/music/britneyspears: 45a663b5-b1cb-4a91-bff6-2bef7bbfdd76 #Britney Spears
199
+ http://www.guardian.co.uk/music/spicegirls: bf0caafc-2b20-4e07-ab85-87e14ff430ce #Spice Girls
200
+ http://www.guardian.co.uk/music/spiritualized: 65041e06-83d2-4987-ae52-c17a915fc82a #Spiritualized
201
+ http://www.guardian.co.uk/music/springsteen: 70248960-cb53-4ea4-943a-edb18f7d336f #Bruce Springsteen
202
+ http://www.guardian.co.uk/music/gwenstefani: 2e41ae9c-afd2-4f20-8f1e-17281ce9b472 #Gwen Stefani
203
+ http://www.guardian.co.uk/music/sufjanstevens: 01d3c51b-9b98-418a-8d8e-37f6fab59d8c #Sufjan Stevens
204
+ http://www.guardian.co.uk/music/davestewart: 4f8df6e2-33dc-4d05-86d9-2f9641c6f4d7 #Dave Stewart & Barbara Gaskin
205
+ http://www.guardian.co.uk/music/rodstewart: a35237a0-4f47-40a6-b6f3-1e786db23402 #Rod Stewart
206
+ http://www.guardian.co.uk/music/stoneroses: b5fa29f1-6c22-4321-a488-b5f363b06b06 #The Stone Roses
207
+ http://www.guardian.co.uk/music/strokes: 16aacd08-a0f3-46c1-b7ec-f1736f5de60d #The Diff'rent Strokes
208
+ http://www.guardian.co.uk/music/joestrummer: 39c1e474-647e-42ef-a157-fcfb30c2c2ff #Joe Strummer & The Mescaleros
209
+ http://www.guardian.co.uk/music/supergrass: 6386ddff-0d13-4685-9f0a-a82bf022fb1c #Supergrass
210
+ http://www.guardian.co.uk/music/hives: 487bfd74-71bf-46dd-b89c-80b7a0f06f2f #The Hives
211
+ http://www.guardian.co.uk/music/therollingstones: b071f9fa-14b0-4217-8e97-eb41da73f598 #The Rolling Stones
212
+ http://www.guardian.co.uk/music/thespecials: 07eb40a2-2914-439c-a01d-15a685b84ddf #The Specials
213
+ http://www.guardian.co.uk/music/thestreets: 0345b1d2-9017-4a97-848e-d5f7d2ea8de6 #One Way Streets
214
+ http://www.guardian.co.uk/music/who: 9fdaa16b-a6c4-4831-b87c-bc9ca8ce7eaa #The Who
215
+ http://www.guardian.co.uk/music/timbaland: daa09819-5da5-4c7a-8bef-eb372bb27ff1 #Timbaland
216
+ http://www.guardian.co.uk/music/justintimberlake: 596ffa74-3d08-44ef-b113-765d43d12738 #Justin Timberlake
217
+ http://www.guardian.co.uk/music/petetownshend: fb147b8f-0144-4418-acaa-90b2d9779840 #Pete Townshend
218
+ http://www.guardian.co.uk/music/kttunstall: 951d2103-9c7d-4849-ae60-88bf6aa4790b #KT Tunstall
219
+ http://www.guardian.co.uk/music/u2: a3cb23fc-acd3-4ce0-8f36-1e5aa6a18432 #U2
220
+ http://www.guardian.co.uk/music/ub40: 7113aab7-628f-4050-ae49-dbecac110ca8 #UB40
221
+ http://www.guardian.co.uk/music/urban: ca738bcc-b2ce-4dcc-af52-d1654bfd4733 #Urban Cowboyz
222
+ http://www.guardian.co.uk/music/velvetrevolver: 3a528006-1429-47f4-ae9b-2ea95343e16a #Velvet Revolver
223
+ http://www.guardian.co.uk/music/velvetunderground: 94b0fb9d-a066-4823-b2ec-af1d324bcfcf #The Velvet Underground
224
+ http://www.guardian.co.uk/music/verve: d4d17620-fd97-4574-92a8-a2cb7e72ce42 #The Verve
225
+ http://www.guardian.co.uk/music/vines: 4e045c96-538b-46ed-8ea8-7cae20b56574 #The Vines
226
+ http://www.guardian.co.uk/music/marthawainwright: 231475d8-fddb-4d7d-aad9-287e59e4b4ba #Martha Wainwright
227
+ http://www.guardian.co.uk/music/rufuswainwright: 78e46ae5-9bfd-433b-be3f-19e993d67ecc #Rufus Wainwright
228
+ http://www.guardian.co.uk/music/mward: 655b3e5b-09e4-45dd-941c-6fa3fc12521b #M. Ward
229
+ http://www.guardian.co.uk/music/werejammin: 7fa1acb8-4fb9-4947-9040-51a844969834 #Jammin'
230
+ http://www.guardian.co.uk/music/weezer: 6fe07aa5-fec0-4eca-a456-f29bff451b04 #Weezer
231
+ http://www.guardian.co.uk/music/paulweller: ac1749b5-088e-4c42-9c39-7f578ff54f6e #Paul Weller vs. Portishead
232
+ http://www.guardian.co.uk/music/kanyewest: 164f0d73-1234-4e2c-8743-d77bf2191051 #Kanye West
233
+ http://www.guardian.co.uk/music/thewhitestripes: 11ae9fbb-f3d7-4a47-936f-4c0a04d3b3b5 #The White Stripes
234
+ http://www.guardian.co.uk/music/wilco: 9e53f84d-ef44-4c16-9677-5fd4d78cbd7d #Wilco
235
+ http://www.guardian.co.uk/music/pharrellwilliams: 149f91ef-1287-46da-9a8e-87fee02f1471 #Pharrell Williams
236
+ http://www.guardian.co.uk/music/brianwilson: 9b07fae3-4442-4c40-a9e0-78d3e0540901 #Brian Wilson & Van Dyke Parks
237
+ http://www.guardian.co.uk/music/tonywilson: c90528f0-75e7-435f-82e8-dfbdcf8824d3 #Tony Wilson
238
+ http://www.guardian.co.uk/music/amywinehouse: dfe9a7c4-8cf2-47f4-9dcb-d233c2b86ec3 #Amy Winehouse
239
+ http://www.guardian.co.uk/music/patrickwolf: 4ac4e32b-bd18-402e-adad-ae00e72f8d85 #Patrick Wolf
240
+ http://www.guardian.co.uk/music/steviewonder: 1ee18fb3-18a6-4c7f-8ba0-bc41cdd0462e #Stevie Wonder
241
+ http://www.guardian.co.uk/music/wutangclan: 0febdcf7-4e1f-4661-9493-b40427de2c13 #Wu-Tang Clan
242
+ http://www.guardian.co.uk/music/xtc: 97c86b2c-2765-46a2-aef8-76a7e24c430f #XTC
243
+ http://www.guardian.co.uk/music/yeahyeahyeahs: 584c04d2-4acc-491b-8a0a-e63133f4bfc4 #Yeah Yeah Yeahs
244
+ http://www.guardian.co.uk/music/neilyoung: 0f3515b0-75c9-46c9-b26c-4cd05d26eae7 #Neil Young & Crazy Horse
245
+ http://www.guardian.co.uk/music/thezutons: 6290b769-173d-49d1-990e-660a4e333877 #The Zutons
@@ -0,0 +1,61 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+ require 'open-uri'
4
+ require 'yaml'
5
+
6
+ $: << File.expand_path(File.dirname(__FILE__))
7
+
8
+ class LinkToad
9
+ attr_reader :mapping
10
+
11
+ # +mapping+ is a hash mapping from a URL to an identifier
12
+ # that should be associated with that URL.
13
+ def initialize(mapping)
14
+ @mapping = mapping
15
+ end
16
+
17
+ # Returns the identifiers for the document at the given +url+.
18
+ #
19
+ # Identifiers are found by looking up links in the document in the +mapping+ hash.
20
+ def match(url)
21
+ links = links_from_url(url)
22
+ links.map { |l| hits_for_uri(l) }.flatten.uniq
23
+ end
24
+
25
+ protected
26
+
27
+ def links_from_url(url)
28
+ doc = Hpricot(open(url))
29
+ links = []
30
+ doc.search('//a').each do |link|
31
+ next if link.attributes['href'].nil?
32
+ begin
33
+ uri = URI.parse(link.attributes['href'].strip)
34
+ rescue URI::InvalidURIError
35
+ next
36
+ end
37
+ next unless (uri.kind_of? URI::HTTP or uri.kind_of? URI::HTTPS)
38
+ links << uri.to_s
39
+ end
40
+ links.uniq
41
+ end
42
+
43
+ def hits_for_uri(uri)
44
+ # search for gids with both a trailing / and without
45
+ uri_string = uri.gsub(%r[/$], '')
46
+ uri_strings = [ uri_string, "#{uri_string}/" ]
47
+
48
+ # search for gids with both 'www.' and without
49
+ if uri_string =~ %r[http://www\.]
50
+ uri_strings << uri_string.gsub('http://www.', 'http://')
51
+ else
52
+ uri_strings << uri_string.gsub('http://', 'http://www.')
53
+ end
54
+ uri_strings << "#{uri_strings.last}/"
55
+
56
+ # try also without the index.* if it has one
57
+ uri_strings << uri_string.sub(/index\.\w{3,4}/i, '') if uri_string =~ /index\.\w{3,4}$/i
58
+
59
+ uri_strings.map { |u| @mapping[u] }.flatten.compact.uniq
60
+ end
61
+ end
@@ -0,0 +1,10 @@
1
+ class MusicArtistsLinkToad < LinkToad
2
+
3
+ protected
4
+
5
+ def hits_for_uri(uri)
6
+ return [$2] if (uri=~%r[http://(www\.)?musicbrainz.org/artist/([-a-f0-9]{36})])
7
+ return [$2] if (uri=~%r[http://(www\.)?bbc.co.uk/music/artists/([-a-f0-9]{36})])
8
+ super
9
+ end
10
+ end
@@ -0,0 +1,81 @@
1
+ require File.join(File.dirname(__FILE__), 'spec_helper')
2
+
3
+ describe LinkToad do
4
+ before(:each) do
5
+ @toad = LinkToad.new({})
6
+ end
7
+
8
+ describe "extracting links" do
9
+ describe "from an empty doc" do
10
+ before(:each) do
11
+ @toad.expects(:open).once.returns('')
12
+ end
13
+
14
+ it "should return an empty array of links" do
15
+ @toad.send(:links_from_url, 'http://www.foo.com').should == []
16
+ end
17
+ end
18
+
19
+ describe "from a page with one link" do
20
+ before(:each) do
21
+ @toad.expects(:open).once.returns('<a href="http://www.foo.com">foo</a>')
22
+ end
23
+
24
+ it "should return that link back" do
25
+ @toad.send(:links_from_url, 'http://www.foo.com').should == [ 'http://www.foo.com' ]
26
+ end
27
+ end
28
+ end
29
+
30
+ describe "looking up gids for a uri" do
31
+ before(:each) do
32
+ @toad.mapping.merge!({
33
+ 'http://www.coldplay.com/' => 'cc197bad-dc9c-440d-a5b5-d52ba2e14234',
34
+ 'http://www.keanemusic.com/' => 'c7020c6d-cae9-4db3-92a7-e5c561cbad50',
35
+ 'http://www.gymclassheroes.com/' => 'f4d4b515-0b74-423f-a161-db184330c37c',
36
+ 'http://www.madonna.com/' => '79239441-bfd5-4981-a70c-55c3f15c1287',
37
+ 'http://www.oasisinet.com/' => '39ab1aed-75e0-4140-bd47-540276886b60',
38
+ 'http://adele.tv/' => 'cc2c9c3c-b7bc-4b8b-84d8-4fbd8779e493',
39
+ })
40
+ end
41
+
42
+ it "should return Coldplay's GID with an exact URL match" do
43
+ @toad.send(:hits_for_uri, 'http://www.coldplay.com/').should == ['cc197bad-dc9c-440d-a5b5-d52ba2e14234']
44
+ end
45
+
46
+ it "should return Coldplay's GID without the trailing slash" do
47
+ @toad.send(:hits_for_uri, 'http://www.coldplay.com').should == ['cc197bad-dc9c-440d-a5b5-d52ba2e14234']
48
+ end
49
+
50
+ it "should return Coldplay's GID with an index.php URL" do
51
+ @toad.send(:hits_for_uri, 'http://www.coldplay.com/index.php').should == ['cc197bad-dc9c-440d-a5b5-d52ba2e14234']
52
+ end
53
+
54
+ it "should return Coldplay's GID with an index.php URL" do
55
+ @toad.send(:hits_for_uri, 'http://www.coldplay.com/index.php').should == ['cc197bad-dc9c-440d-a5b5-d52ba2e14234']
56
+ end
57
+
58
+ it "should return Oasis's GID with an index.aspx URL" do
59
+ @toad.send(:hits_for_uri, 'http://www.oasisinet.com/index.aspx').should == ['39ab1aed-75e0-4140-bd47-540276886b60']
60
+ @toad.send(:hits_for_uri, 'http://www.oasisinet.com/Index.aspx').should == ['39ab1aed-75e0-4140-bd47-540276886b60']
61
+ end
62
+
63
+ it "should return Madonna's GID with an exact URL match" do
64
+ @toad.send(:hits_for_uri, 'http://www.madonna.com/').should == ['79239441-bfd5-4981-a70c-55c3f15c1287']
65
+ end
66
+
67
+ it "should return Madonna's GID with URL omitting www." do
68
+ @toad.send(:hits_for_uri, 'http://madonna.com').should == ['79239441-bfd5-4981-a70c-55c3f15c1287']
69
+ @toad.send(:hits_for_uri, 'http://madonna.com/').should == ['79239441-bfd5-4981-a70c-55c3f15c1287']
70
+ end
71
+
72
+ it "should return Adele's GID with an exact URL match" do
73
+ @toad.send(:hits_for_uri, 'http://adele.tv/').should == ['cc2c9c3c-b7bc-4b8b-84d8-4fbd8779e493']
74
+ end
75
+
76
+ it "should return Adele's GID with URL omitting www." do
77
+ @toad.send(:hits_for_uri, 'http://www.adele.tv').should == ['cc2c9c3c-b7bc-4b8b-84d8-4fbd8779e493']
78
+ @toad.send(:hits_for_uri, 'http://www.adele.tv/').should == ['cc2c9c3c-b7bc-4b8b-84d8-4fbd8779e493']
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,28 @@
1
+ require File.join(File.dirname(__FILE__), 'spec_helper')
2
+
3
+ require 'link_toad/music_link_toad'
4
+
5
+ describe MusicArtistsLinkToad do
6
+ before(:each) do
7
+ @toad = MusicArtistsLinkToad.new({})
8
+ end
9
+
10
+ describe "looking up gids for a uri" do
11
+
12
+ it "should match a BBC Artist URL" do
13
+ [ 'http://bbc.co.uk/music/artists/cc197bad-dc9c-440d-a5b5-d52ba2e14234',
14
+ 'http://bbc.co.uk/music/artists/cc197bad-dc9c-440d-a5b5-d52ba2e14234/',
15
+ 'http://www.bbc.co.uk/music/artists/cc197bad-dc9c-440d-a5b5-d52ba2e14234',
16
+ 'http://www.bbc.co.uk/music/artists/cc197bad-dc9c-440d-a5b5-d52ba2e14234/',
17
+ ].each { |uri| @toad.send(:hits_for_uri, uri).should == [ 'cc197bad-dc9c-440d-a5b5-d52ba2e14234' ] }
18
+ end
19
+
20
+ it "should match a MusicBrainz URL" do
21
+ [ 'http://musicbrainz.org/artist/cc197bad-dc9c-440d-a5b5-d52ba2e14234',
22
+ 'http://musicbrainz.org/artist/cc197bad-dc9c-440d-a5b5-d52ba2e14234/',
23
+ 'http://www.musicbrainz.org/artist/cc197bad-dc9c-440d-a5b5-d52ba2e14234',
24
+ 'http://www.musicbrainz.org/artist/cc197bad-dc9c-440d-a5b5-d52ba2e14234/',
25
+ ].each { |uri| @toad.send(:hits_for_uri, uri).should == [ 'cc197bad-dc9c-440d-a5b5-d52ba2e14234' ] }
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,2 @@
1
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'link_toad')
2
+ require 'mocha'
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: metade-link_toad
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Patrick Sinclair
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-10-11 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">"
21
+ - !ruby/object:Gem::Version
22
+ version: 0.0.0
23
+ version:
24
+ description: LinkToad is a general purpose equivalency engine that uses hyperlinks.
25
+ email: metade@gmail.com
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files:
31
+ - README
32
+ files:
33
+ - README
34
+ - lib/link_toad.rb
35
+ - lib/link_toad/music_link_toad.rb
36
+ - examples/musicbrainz/guardian/README
37
+ - examples/musicbrainz/guardian/guardian-music-news.rb
38
+ - examples/musicbrainz/guardian/guardian-musicbrainz.rb
39
+ - examples/musicbrainz/guardian/guardian-musicbrainz.yml
40
+ has_rdoc: true
41
+ homepage: http://github.com/metade/link_toad
42
+ post_install_message:
43
+ rdoc_options:
44
+ - --main
45
+ - README
46
+ - -x
47
+ - example
48
+ - -x
49
+ - spec
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ version:
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: "0"
63
+ version:
64
+ requirements: []
65
+
66
+ rubyforge_project:
67
+ rubygems_version: 1.2.0
68
+ signing_key:
69
+ specification_version: 2
70
+ summary: "LinkToad: hyperlink-powered equivalency engine."
71
+ test_files:
72
+ - spec/spec_helper.rb
73
+ - spec/link_toad_spec.rb
74
+ - spec/music_link_toad_spec.rb