doctor_scrape 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. data/.gitignore +18 -0
  2. data/.rbenv-version +1 -0
  3. data/.rspec +2 -0
  4. data/Gemfile +4 -0
  5. data/Guardfile +10 -0
  6. data/LICENSE +22 -0
  7. data/README.md +78 -0
  8. data/Rakefile +14 -0
  9. data/doctor_scrape.gemspec +35 -0
  10. data/lib/doctor_scrape/data.rb +12 -0
  11. data/lib/doctor_scrape/redirect_follower.rb +29 -0
  12. data/lib/doctor_scrape/scraper/base.rb +50 -0
  13. data/lib/doctor_scrape/scraper/bora.rb +25 -0
  14. data/lib/doctor_scrape/scraper/diva.rb +16 -0
  15. data/lib/doctor_scrape/scraper/duo.rb +32 -0
  16. data/lib/doctor_scrape/scraper/meta.rb +39 -0
  17. data/lib/doctor_scrape/scraper/unknown.rb +20 -0
  18. data/lib/doctor_scrape/search.rb +42 -0
  19. data/lib/doctor_scrape/version.rb +3 -0
  20. data/lib/doctor_scrape.rb +37 -0
  21. data/spec/cassettes/brage_bibsys_no_hibo_handle_URN_NBN_no-bibsys_brage_17854.yml +624 -0
  22. data/spec/cassettes/brage_bibsys_no_nhh_handle_URN_NBN_no-bibsys_brage_22813.yml +535 -0
  23. data/spec/cassettes/brage_bibsys_no_nhh_handle_URN_NBN_no-bibsys_brage_24121.yml +388 -0
  24. data/spec/cassettes/https___bora_hib_no_handle_10049_234.yml +429 -0
  25. data/spec/cassettes/https___bora_hib_no_handle_10049_330.yml +347 -0
  26. data/spec/cassettes/https___bora_uib_no_handle_1956_3282.yml +682 -0
  27. data/spec/cassettes/nora_search.yml +795 -0
  28. data/spec/cassettes/ntnu.diva-portal.org_smash_record.jsf?searchId=1&pid=diva2:122798.yml +398 -0
  29. data/spec/cassettes/ntnu_diva-portal_org_smash_record_jsf_searchId_1_pid_diva2_122794.yml +398 -0
  30. data/spec/cassettes/ntnu_diva-portal_org_smash_record_jsf_searchId_1_pid_diva2_122798.yml +487 -0
  31. data/spec/cassettes/www_duo_uio_no_sok_work_html_WORKID_112975.yml +248 -0
  32. data/spec/cassettes/www_duo_uio_no_sok_work_html_WORKID_149776.yml +240 -0
  33. data/spec/cassettes/www_ub_uit_no_munin_handle_10037_3822.yml +602 -0
  34. data/spec/cassettes/www_ub_uit_no_munin_handle_10037_3826.yml +534 -0
  35. data/spec/data_spec.rb +22 -0
  36. data/spec/parse/bibsys_spec.rb +40 -0
  37. data/spec/parse/bora_hib_spec.rb +29 -0
  38. data/spec/parse/bora_uib_spec.rb +18 -0
  39. data/spec/parse/diva_spec.rb +29 -0
  40. data/spec/parse/duo_spec.rb +29 -0
  41. data/spec/parse/munin_spec.rb +30 -0
  42. data/spec/redirect_follower_spec.rb +37 -0
  43. data/spec/scraper_spec.rb +43 -0
  44. data/spec/scrapers/base_spec.rb +6 -0
  45. data/spec/scrapers/bora_spec.rb +6 -0
  46. data/spec/scrapers/diva_spec.rb +6 -0
  47. data/spec/scrapers/duo_spec.rb +6 -0
  48. data/spec/scrapers/meta_spec.rb +6 -0
  49. data/spec/scrapers/unknown_spec.rb +18 -0
  50. data/spec/search_spec.rb +111 -0
  51. data/spec/shared/scraper.rb +62 -0
  52. data/spec/spec_helper.rb +29 -0
  53. data/spec/support/setup_scraper.rb +8 -0
  54. metadata +292 -0
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ vendor/ruby
data/.rbenv-version ADDED
@@ -0,0 +1 @@
1
+ 1.9.3-p125-perf
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in doctor_scrape.gemspec
4
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,10 @@
1
+ # More info at https://github.com/guard/guard#readme
2
+
3
+ guard 'rspec', :version => 2 do
4
+ watch(%r{^spec/.+_spec\.rb$})
5
+ watch(%r{^lib/doctor_scrape/scraper/(.+)\.rb$}) { |m| "spec/scrapers/#{m[1]}_spec.rb" }
6
+ watch(%r{^spec/shared/}) { "spec" }
7
+ watch("spec/spec_helper.rb") { "spec" }
8
+ watch(%r{^lib/.+\.rb$}) { "spec" }
9
+ end
10
+
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Gudleik Rasch
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,78 @@
1
+ # DoctorScrape
2
+
3
+ Collection of libraries to scrape contents from norwegian doctoral dissertations.
4
+ Used by http://avhandlinger.no
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'doctor_scrape'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install doctor_scrape
19
+
20
+ ## Usage
21
+
22
+ Ta-da
23
+
24
+ ## Contributing
25
+
26
+ 1. Fork it
27
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
28
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
29
+ 4. Push to the branch (`git push origin my-new-feature`)
30
+ 5. Create new Pull Request
31
+
32
+ ## Resources
33
+
34
+ ### Supported
35
+
36
+ * Duo: http://www.duo.uio.no/sok/search.html?documentTypes=Doktoravhandling&yearFrom=1917&yearTo=2011
37
+ * Brage/NHH: http://brage.bibsys.no/nhh/browse?type=type&order=ASC&rpp=250&value=Doctoral+thesis
38
+ * DIVA/NTNU: http://ntnu.diva-portal.org/smash/searchlist.jsf?searchId=1
39
+ * MUNIN/UiT: http://www.ub.uit.no/munin/
40
+ * Bora/HiB: https://bora.hib.no/
41
+
42
+ ### Unsupported
43
+
44
+ DE STORE
45
+
46
+ * Bora/UiB: https://bora.uib.no/browse?type=documenttype&order=ASC&rpp=650&value=Doctoral+thesis
47
+ * TEORA: http://teora.hit.no/dspace/ (Merk at avhandlinger ligger under ulike enheter, ikke bare «Doktorgradsavhandlinger - dr. ingeniør».)
48
+
49
+ DE MELLOMSTORE
50
+
51
+ * Brage/BI: http://brage.bibsys.no/bi/browse?type=type&order=ASC&rpp=50&value=Doctoral+thesis
52
+ * Brage/NIH: http://brage.bibsys.no/nih/browse?type=type&order=ASC&rpp=500&value=Doctoral+thesis
53
+ * Brage/UiA: http://brage.bibsys.no/hia/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
54
+ * Brage/UiN: http://brage.bibsys.no/hibo/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
55
+ * Brage/UiS: http://brage.bibsys.no/uis/browse?type=type&order=ASC&rpp=25&value=Doctoral+thesis
56
+
57
+ DE SMÅ
58
+
59
+ * ODA/HiO: https://oda.hio.no/jspui/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
60
+ * Brage/AHO: http://brage.bibsys.no/aho/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
61
+ * Brage/DHS: http://brage.bibsys.no/diakon/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
62
+ * Brage/HSF: http://brage.bibsys.no/hsf/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
63
+ * Brage/HiNT: http://brage.bibsys.no/hint/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
64
+ * Brage/HiNe: http://brage.bibsys.no/hinesna/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
65
+ * Brage/HiL: http://brage.bibsys.no/hil/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
66
+ * Brage/HiST: http://brage.bibsys.no/hist/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
67
+ * Brage/HiL: http://brage.bibsys.no/hil/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
68
+ * Brage/MHS: http://brage.bibsys.no/misjon/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
69
+ * Brage/PHS: http://brage.bibsys.no/politihs/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
70
+ * Brage/NP: http://brage.bibsys.no/npolar/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
71
+ * Brage/HiH: http://brage.bibsys.no/hhe/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
72
+ * Brage/IMR: http://brage.bibsys.no/imr/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
73
+ * Brage/HiØ: http://brage.bibsys.no/hiof/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
74
+ * Brage/NMH: http://brage.bibsys.no/nmh/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
75
+ * Brage/SA: http://brage.bibsys.no/samall/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
76
+ * Brage/UMB: http://brage.bibsys.no/umb/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
77
+ * Brage/SSB: http://brage.bibsys.no/ssb/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
78
+ * Brage/KRUS: http://brage.bibsys.no/krus/browse?type=type&order=ASC&rpp=20&value=Doctoral+thesis
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rspec/core/rake_task'
4
+
5
+ desc "run specs"
6
+ RSpec::Core::RakeTask.new
7
+
8
+ task :default => :spec
9
+
10
+ desc "Run test suite and generate coverage report"
11
+ task :coverage do
12
+ ENV["SIMPLECOV"] = "1"
13
+ Rake::Task['spec'].invoke
14
+ end
@@ -0,0 +1,35 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/doctor_scrape/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Gudleik Rasch"]
6
+ gem.email = ["gudleik@gmail.com"]
7
+ gem.description = "Library for scraping norwegian doctoral dissertations"
8
+ gem.summary = "Library for scraping norwegian doctoral dissertations"
9
+ gem.homepage = "https://github.com/Skalar/doctor_scrape"
10
+
11
+ gem.files = `git ls-files`.split("\n")
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "doctor_scrape"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = DoctorScrape::VERSION
17
+
18
+ gem.add_dependency "mechanize", ["~> 2.3"]
19
+ # gem.add_dependency "text", ["~> 1.0.3"]
20
+
21
+ gem.add_development_dependency "rspec", ["~> 2.8.0"]
22
+ gem.add_development_dependency "vcr", ["~> 2.0.0.rc1"]
23
+ gem.add_development_dependency "webmock", ["< 1.8"]
24
+ gem.add_development_dependency "guard", ["~> 1.0.0" ]
25
+ gem.add_development_dependency "guard-rspec", ["~> 0.6.0"]
26
+ gem.add_development_dependency "ruby_gntp", ["~> 0.3.4"]
27
+ gem.add_development_dependency "rb-fsevent", ["~> 0.9.0"]
28
+ gem.add_development_dependency "pry", ["~> 0.9.8.2"]
29
+ gem.add_development_dependency "pry-doc", ["~> 0.4.0"]
30
+ gem.add_development_dependency "pry-editline", ["~> 1.1.1"]
31
+ gem.add_development_dependency "hirb", ["~> 0.6.0"]
32
+ gem.add_development_dependency "awesome_print", ["~> 1.0.2"]
33
+ gem.add_development_dependency "simplecov", ["~> 0.6.0"]
34
+ # gem.add_development_dependency "psych", ["~> 1.2.2"]
35
+ end
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+ module DoctorScrape
3
+ class Data < OpenStruct
4
+ def fields
5
+ @table.keys
6
+ end
7
+
8
+ def to_hash
9
+ @table
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,29 @@
1
+ # encoding: utf-8
2
+ require 'net/https'
3
+ module DoctorScrape
4
+ class TooManyRedirects < StandardError; end
5
+
6
+ class RedirectFollower
7
+ attr_accessor :url
8
+
9
+ def initialize(url)
10
+ @url = url
11
+ end
12
+
13
+ def resolve(limit=5)
14
+ raise TooManyRedirects if limit == 0
15
+
16
+ response = Net::HTTP.get_response URI.parse(@url)
17
+
18
+ if response.is_a? Net::HTTPRedirection
19
+ @url = response['location']
20
+ resolve limit - 1
21
+ end
22
+
23
+ @url
24
+ rescue Net::HTTPBadResponse => error
25
+ # This can safely be ignored
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,50 @@
1
+ # encoding: utf-8
2
+ module DoctorScrape::Scraper
3
+
4
+ class Base
5
+ attr_reader :data, :errors
6
+
7
+ def initialize(url)
8
+ @url = url
9
+ @data = DoctorScrape::Data.new url: @url
10
+ end
11
+
12
+ def scrape
13
+ @errors = []
14
+ fetch && parse
15
+ rescue => error
16
+ @errors << error
17
+ ensure
18
+ return !errors?
19
+ end
20
+
21
+ def fetch
22
+ @body ||= open(url)
23
+ end
24
+
25
+ def doc
26
+ @doc ||= Nokogiri::HTML @body
27
+ end
28
+
29
+ def url
30
+ @url
31
+ end
32
+
33
+ def errors?
34
+ errors.any?
35
+ end
36
+
37
+ protected
38
+
39
+ def link(selector)
40
+ href = doc.at(selector)['href']
41
+ href ? URI.parse(@url).merge(href).to_s : nil
42
+ end
43
+
44
+ def pdf_from_link
45
+ link ".standard:nth-child(1) a"
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+ module DoctorScrape::Scraper
3
+
4
+ class Bora < Base
5
+
6
+ def parse
7
+
8
+ doc.search("table.itemDisplayTable tr").each do |el|
9
+ key, value = el.children[0..1].map &:text
10
+ next unless key =~ /^dc\./
11
+
12
+ attribute = key.match(/\.(?<attr>[a-z]+)$/)[:attr]
13
+ @data.send("#{attribute}=", value)
14
+ end
15
+
16
+ @data.permalink = @data.uri
17
+ @data.pdf = pdf_from_link
18
+ end
19
+
20
+ def url
21
+ "#{@url}?mode=full"
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,16 @@
1
+ # encoding: utf-8
2
+ module DoctorScrape::Scraper
3
+
4
+ class Diva < Meta
5
+
6
+ def parse
7
+ @data.title = text "DC.Title"
8
+ @data.author = text "DC.Creator"
9
+ @data.issued = text "DC.Date"
10
+ @data.permalink = text "DC.Identifier.url"
11
+ @data.pdf = text "citation_pdf_url"
12
+ end
13
+
14
+ end
15
+
16
+ end
@@ -0,0 +1,32 @@
1
+ # encoding: utf-8
2
+ module DoctorScrape::Scraper
3
+
4
+ class Duo < Base
5
+
6
+ def parse
7
+ doc.search("#main_container table:nth-child(2) tr").each do |el|
8
+ label, value = el.children.search("td")[0..1].map { |a| a.text.strip.gsub(/\r\n/, '') }
9
+
10
+ case label
11
+ when /Tittel/ then @data.title = value
12
+ when /Forfatter/ then @data.author = value
13
+ when /Publisert/ then @data.issued = value
14
+ when /Permanent/ then @data.permalink = value
15
+ end
16
+
17
+ end
18
+
19
+ @data.pdf = link "#main_container table:nth-child(2) + p a"
20
+ @data.abstract = text "#main_container p:last"
21
+ end
22
+
23
+ protected
24
+
25
+ def text(selector)
26
+ el = doc.at(selector)
27
+ el ? el.text.strip : nil
28
+ end
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+ module DoctorScrape::Scraper
3
+
4
+ # This class uses the meta tags from the HTML document to get the content
5
+ class Meta < Base
6
+
7
+ def parse
8
+ @data.title = text "DC.title"
9
+ @data.author = text "DC.creator"
10
+ @data.issued = text "DCTERMS.issued"
11
+ @data.permalink = doc.xpath("//meta[@scheme='DCTERMS.URI']/@content").text
12
+ @data.pdf = pdf_from_meta || pdf_from_link
13
+ @data.abstract = abstract
14
+ end
15
+
16
+ protected
17
+
18
+ def abstract
19
+ meta("DCTERMS.abstract").map(&:text).sort { |a,b| b.size <=> a.size }.first
20
+ end
21
+
22
+ def text(name)
23
+ if value = meta(name).text
24
+ CGI.unescapeHTML(value).gsub(/\r\n/, ' ')
25
+ end
26
+ end
27
+
28
+ def meta(name)
29
+ doc.xpath("//meta[@name='#{name}']/@content")
30
+ end
31
+
32
+ def pdf_from_meta
33
+ value = doc.xpath("//meta[contains(@content, 'pdf')]/@content").map(&:text).select { |x| x =~ /http/ }.first
34
+ # sometimes the value contains Fulltext, doh
35
+ value.gsub(/Fulltext /, '') if value
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,20 @@
1
+ # encoding: utf-8
2
+ module DoctorScrape::Scraper
3
+
4
+ class Unknown < Base
5
+
6
+ def errors
7
+ [ "Don't know how to scrape this page" ]
8
+ end
9
+
10
+ def scrape
11
+ false
12
+ end
13
+
14
+ def fetch
15
+ false
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+ require 'rss'
3
+
4
+ module DoctorScrape
5
+
6
+ class Search
7
+
8
+ class << self
9
+ def nora(options={})
10
+ params = {
11
+ "PAGESIZE" => options[:limit] || 50,
12
+ "FROM" => options[:from] || 2007,
13
+ "TO" => options[:to] || Time.now.year,
14
+ "SEARCHMODE" => "TOPIC",
15
+ "DOCUMENTTYPES" => "Doctoral+thesis",
16
+ "RESULTMODE" => "rss",
17
+ }
18
+
19
+ url = "http://www.ub.uio.no/nora/result.html?" << params.map { |key,val| "#{key}=#{val}" }.join("&")
20
+
21
+ open url do |rss|
22
+ feed = RSS::Parser.parse rss
23
+
24
+ return [] if feed.nil?
25
+ items = feed.items.map &:link
26
+
27
+ options[:resolve] ? resolve_scrapers(items) : items
28
+ end
29
+ end
30
+
31
+ def resolve_urls(urls)
32
+ urls.map { |url| RedirectFollower.new(url).resolve }
33
+ end
34
+
35
+ def resolve_scrapers(urls)
36
+ resolve_urls(urls).map { |url| Scraper.for url }
37
+ end
38
+
39
+ end
40
+ end
41
+
42
+ end
@@ -0,0 +1,3 @@
1
+ module DoctorScrape
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,37 @@
1
+ # encoding: utf-8
2
+ require 'doctor_scrape/version'
3
+ require 'mechanize'
4
+ require 'nokogiri'
5
+ require 'ostruct'
6
+ require 'open-uri'
7
+
8
+ module DoctorScrape
9
+ autoload :Data, 'doctor_scrape/data'
10
+ autoload :Search, 'doctor_scrape/search'
11
+ autoload :RedirectFollower, 'doctor_scrape/redirect_follower'
12
+
13
+ module Scraper
14
+ autoload :Base, 'doctor_scrape/scraper/base'
15
+ autoload :Bora, 'doctor_scrape/scraper/bora'
16
+ autoload :Duo, 'doctor_scrape/scraper/duo'
17
+ autoload :Diva, 'doctor_scrape/scraper/diva'
18
+ autoload :Meta, 'doctor_scrape/scraper/meta'
19
+ autoload :Unknown, 'doctor_scrape/scraper/unknown'
20
+
21
+ # TODO: move this logic into the scraper classes
22
+ def self.for(url)
23
+ case url
24
+ when %r{^http://(www.)?duo.uio.no} then Duo.new url
25
+ when %r{^https?://bora.hib.no} then Bora.new url
26
+ when %r{^http://.+bibsys.no},
27
+ %r{idtjeneste.nb.no/URN:NBN:no-bibsys_brage} then Meta.new url
28
+ when %r{^http://ntnu.diva-portal.org/smash/record.jsf},
29
+ %r{urn=urn:nbn:no:ntnu:diva} then Diva.new url
30
+
31
+ else
32
+ Unknown.new url
33
+ end
34
+ end
35
+ end
36
+
37
+ end