math_metadata_lookup 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +69 -0
- data/Rakefile +37 -0
- data/TODO +15 -0
- data/bin/math_metadata_lookup +134 -0
- data/lib/math_metadata_lookup.rb +28 -0
- data/lib/math_metadata_lookup/article.rb +150 -0
- data/lib/math_metadata_lookup/author.rb +50 -0
- data/lib/math_metadata_lookup/entity.rb +39 -0
- data/lib/math_metadata_lookup/lookup.rb +85 -0
- data/lib/math_metadata_lookup/reference.rb +122 -0
- data/lib/math_metadata_lookup/result.rb +97 -0
- data/lib/math_metadata_lookup/site.rb +221 -0
- data/lib/math_metadata_lookup/sites/mr.rb +67 -0
- data/lib/math_metadata_lookup/sites/zbl.rb +97 -0
- data/lib/math_metadata_lookup/tools.rb +110 -0
- data/math_metadata_lookup.gemspec +29 -0
- metadata +108 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
module MathMetadata
|
2
|
+
|
3
|
+
class Entity
|
4
|
+
|
5
|
+
def initialize( meta={} )
|
6
|
+
@metadata = meta.dup
|
7
|
+
end
|
8
|
+
|
9
|
+
def method_missing( meth, *args )
|
10
|
+
case meth.to_s
|
11
|
+
when /(.*?)=/
|
12
|
+
self[$1] = args.first
|
13
|
+
else
|
14
|
+
self[meth]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def [](key)
|
19
|
+
@metadata[key.to_sym]
|
20
|
+
end
|
21
|
+
|
22
|
+
def []=(key, value)
|
23
|
+
@metadata[key.to_sym] = value
|
24
|
+
end
|
25
|
+
|
26
|
+
def format( f=:ruby )
|
27
|
+
result = self
|
28
|
+
|
29
|
+
case f.to_sym
|
30
|
+
when :text, :html, :xml
|
31
|
+
result = self.send("to_#{f}")
|
32
|
+
end
|
33
|
+
|
34
|
+
result
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# -*-: coding: utf-8 -*-
|
2
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
|
+
|
4
|
+
module MathMetadata
|
5
|
+
|
6
|
+
# Main class for searching through all sites
|
7
|
+
class Lookup
|
8
|
+
attr_accessor :options
|
9
|
+
|
10
|
+
# :sites can be :all or array of allowed sites ([:mrev, :zbl])
|
11
|
+
def initialize( opts={} )
|
12
|
+
@options = { :sites => :all, :verbose => true }.merge(opts)
|
13
|
+
@sites = []
|
14
|
+
end
|
15
|
+
|
16
|
+
# calls method for each site
|
17
|
+
def method_missing(meth, *args)
|
18
|
+
result = []
|
19
|
+
|
20
|
+
sites = SITES.dup
|
21
|
+
if (@options[:sites] != :all) or @options[:sites].kind_of?(Array)
|
22
|
+
allowed = [@options[:sites]].flatten
|
23
|
+
sites.delete_if{|s| not allowed.include?(s::ID) }
|
24
|
+
end
|
25
|
+
|
26
|
+
sites.each do |klass|
|
27
|
+
site = klass.new(:verbose => @options[:verbose], :nwords => args[0][:nwords])
|
28
|
+
|
29
|
+
entry = {:site => klass::ID, :name => klass::NAME, :url => klass::URL}
|
30
|
+
entry[:result] = site.send(meth, *args)
|
31
|
+
|
32
|
+
result << entry
|
33
|
+
end
|
34
|
+
|
35
|
+
Result.new(result)
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
# try to decide what is best result for query and combine results from all sites to one article response
|
40
|
+
def heuristic( args={} )
|
41
|
+
opts = {:threshold => 0.6}.merge(args)
|
42
|
+
result = Result.new
|
43
|
+
|
44
|
+
# use only authors surnames
|
45
|
+
args_dup = args.dup
|
46
|
+
args_dup[:authors].map!{|a| a =~ /([^,]+)/; $1 ? $1 : a}
|
47
|
+
args_dup[:authors].map!{|a| a =~ /([^ ]+) \S+/; $1 ? $1 : a}
|
48
|
+
args_dup[:nwords] = 2
|
49
|
+
sites = article(args_dup)
|
50
|
+
|
51
|
+
# query article has to contain full names
|
52
|
+
query_article = Article.new( {:title => args[:title].to_s, :authors => args[:authors], :year => args[:year]} )
|
53
|
+
sites.each do |site|
|
54
|
+
site[:result].to_a.each do |article|
|
55
|
+
next if article[:title].to_s.empty?
|
56
|
+
article[:similarity] = query_article.similarity(article)
|
57
|
+
end
|
58
|
+
site[:result].to_a.delete_if{|a| a[:similarity].to_f < opts[:threshold].to_f}
|
59
|
+
if site[:result].to_a.size > 0
|
60
|
+
site[:result].sort!{|a,b| a[:similarity]<=>b[:similarity]}
|
61
|
+
site[:result].reverse!
|
62
|
+
site[:result] = [site[:result].to_a.first]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
sites
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
# parse reference string and execute heuristic to query for article in databases
|
71
|
+
def reference( args={} )
|
72
|
+
ref = Reference.new args[:reference]
|
73
|
+
pp ref if args[:verbose]
|
74
|
+
|
75
|
+
opts = {:threshold => 0.6}.merge(args)
|
76
|
+
opts[:title] = ref.article[:title]
|
77
|
+
opts[:authors] = ref.article[:authors]
|
78
|
+
opts[:year] = ref.article[:year]
|
79
|
+
|
80
|
+
heuristic opts
|
81
|
+
end
|
82
|
+
|
83
|
+
end # Lookup
|
84
|
+
|
85
|
+
end # module
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# -*-: coding: utf-8 -*-
|
2
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
|
+
|
4
|
+
module MathMetadata
|
5
|
+
|
6
|
+
class Reference
|
7
|
+
|
8
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range
|
9
|
+
ARTICLE_REFERENCE_1_RE = %r{([^:]+):\s*(.*?),\s*([^,]+),\s*\((\d{4})\)\s*,\s*([^ ]+)\s*.*?}mi
|
10
|
+
# 1=authors, 2=title, 3=publication, 4=range, 5=publisher, 6=place, 7=year
|
11
|
+
ARTICLE_REFERENCE_2_RE = %r{([^:]+):\s*(.*?),\s*(.*?,\s*[^,]+,\s*[^,]+,\s*[^,]+),\s*pp\.\s*([^,]+?),\s*([^,]+),\s*(.*?),\s*(\d{4})\s*.*?}mi
|
12
|
+
# 1=authors, 2=title, 3=range, 4=publication, 5=place, 6=year
|
13
|
+
ARTICLE_REFERENCE_3_RE = %r{([^:]+):\s*(.*?),\s*pp\.\s*([^,]+?),\s*([^,]+),\s*(.*?),\s*(\d{4})}mi
|
14
|
+
# 1=authors, 2=title, 3=publication, 4=publisher, 5=place, 6=year
|
15
|
+
ARTICLE_REFERENCE_4_RE = %r{([^:]+):\s*(.*?),\s*(.*?),\s*([^,]+),\s*([^,]+),\s*(\d{4})\s*.*?}mi
|
16
|
+
# 1=authors, 2=title, 4=publisher, 5=place, 6=year
|
17
|
+
ARTICLE_REFERENCE_5_RE = %r{([^:]+):\s*(.*?),\s*(.*?),\s*([^,]+),\s*(\d{4})\s*.*?}mi
|
18
|
+
# 1=authors, 2=title, 3=publisher, 4=place, 5=year
|
19
|
+
ARTICLE_REFERENCE_6_RE = %r{([^:]+):\s*(.*?),\s*([^,]+),\s*([^,]+),\s*(\d{4})\s*}mi
|
20
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range
|
21
|
+
ARTICLE_REFERENCE_7_RE = %r{([^:]+):\s*(.*),\s*(.*?,\s*\d+)\s*\((\d{4})\),\s*([^ ]+)\s*.*?}mi
|
22
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range
|
23
|
+
ARTICLE_REFERENCE_8_RE = %r{([^:]+):\s*(.*),\s*(.*?)\s*\((\d{4})\),\s*([^ ]+)\s*.*?}mi
|
24
|
+
# 1=authors, 2=title, 3=publisher, 4=place
|
25
|
+
ARTICLE_REFERENCE_9_RE = %r{([^:]+):\s*(.*?),\s*([^,]+),\s*(.*)}mi
|
26
|
+
# 1=authors, 2=title, 3=publication
|
27
|
+
ARTICLE_REFERENCE_10_RE = %r{([^:]+):\s*(.*?),\s*(.*?)\s*.*?}mi
|
28
|
+
# 1=authors, 2=title, 3=place, 4=year
|
29
|
+
ARTICLE_REFERENCE_11_RE = %r{([^:]+):\s*(.*),(.*?)\s+(\d{4})}mi
|
30
|
+
|
31
|
+
|
32
|
+
attr_accessor :source, :article, :suffix, :number, :reg
|
33
|
+
|
34
|
+
def initialize( str=nil, i=1 )
|
35
|
+
@number = i
|
36
|
+
if str.kind_of?(Article)
|
37
|
+
@source = @suffix = nil
|
38
|
+
@article = str
|
39
|
+
else
|
40
|
+
@source = str
|
41
|
+
@article, @suffix = Reference.parse(str) unless str.to_s.empty?
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
def self.parse( str )
|
47
|
+
article = Article.new
|
48
|
+
rnumber = 0
|
49
|
+
suffix = nil
|
50
|
+
found = []
|
51
|
+
(1..11).each do |j|
|
52
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range, 6=id, 7=place, 8=publisher
|
53
|
+
re = eval("Reference::ARTICLE_REFERENCE_#{j}_RE")
|
54
|
+
if str =~ re
|
55
|
+
case j
|
56
|
+
when 1
|
57
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range, 6=id
|
58
|
+
found = [$1, $2, $3, $4, MathMetadata.normalize_range($5), nil]
|
59
|
+
when 2
|
60
|
+
# 1=authors, 2=title, 3=publication, 4=range, 5=publisher, 6=place, 7=year, 8=id
|
61
|
+
found = [$1, $2, $3, $7, MathMetadata.normalize_range($4), nil, $6, $5]
|
62
|
+
when 3
|
63
|
+
# 1=authors, 2=title, 3=range, 4=publication, 5=place, 6=year
|
64
|
+
found = [$1, $2, $4, $6, MathMetadata.normalize_range($3), nil, $5]
|
65
|
+
when 4
|
66
|
+
# 1=authors, 2=title, 3=publication, 4=publisher, 5=place, 6=year, 7=id
|
67
|
+
found = [$1, $2, $3, $6, nil, nil, $5, $4]
|
68
|
+
when 5
|
69
|
+
# 1=authors, 2=title, 3=publisher, 4=place, 5=year, 6=id
|
70
|
+
found = [$1, $2, nil, $5, nil, nil, $4, $3]
|
71
|
+
when 6
|
72
|
+
# 1=authors, 2=title, 3=publisher, 4=place, 5=year, 6=id
|
73
|
+
found = [$1, $2, nil, $5, nil, nil, $4, $3]
|
74
|
+
when 7
|
75
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range, 6=id
|
76
|
+
found = [$1, $2, $3, $4, MathMetadata.normalize_range($5), nil]
|
77
|
+
when 8
|
78
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range, 6=id
|
79
|
+
found = [$1, $2, $3, $4, MathMetadata.normalize_range($5), nil]
|
80
|
+
when 9
|
81
|
+
# 1=authors, 2=title, 3=publisher, 4=place
|
82
|
+
found = [$1, $2, nil, nil, nil, nil, $4, $3]
|
83
|
+
when 10
|
84
|
+
# 1=authors, 2=title, 3=publication, 4=id
|
85
|
+
found = [$1, $2, $3, nil, nil, nil, nil, nil]
|
86
|
+
when 11
|
87
|
+
# 1=authors, 2=title, 3=place, 4=year
|
88
|
+
found = [$1, $2, nil, $4, nil, nil, $3]
|
89
|
+
end
|
90
|
+
rnumber = j
|
91
|
+
break
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
[:authors, :title, :publication, :year, :range, :id, :place, :publisher].each_with_index do |key, idx|
|
96
|
+
article[key] = found[idx]
|
97
|
+
end
|
98
|
+
article.authors = Reference.split_authors article.authors
|
99
|
+
|
100
|
+
[article, suffix, rnumber]
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
def self.split_authors( str )
|
105
|
+
res = [
|
106
|
+
/;\s*/,
|
107
|
+
/,?\s*(?:and|und|et)\s+/,
|
108
|
+
/(\S+,\s*[^,]+),?\s*/
|
109
|
+
]
|
110
|
+
|
111
|
+
authors = [str]
|
112
|
+
res.each do |re|
|
113
|
+
authors = authors.map{|a| a.to_s.split(re)}.flatten
|
114
|
+
end
|
115
|
+
authors.delete_if{|a| a.strip.empty?}
|
116
|
+
|
117
|
+
authors
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# -*-: coding: utf-8 -*-
|
2
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
|
+
|
4
|
+
module MathMetadata
|
5
|
+
|
6
|
+
class Result
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
FORMATS = [:ruby, :yaml, :xml, :html, :text]
|
10
|
+
|
11
|
+
def initialize( meta=[] )
|
12
|
+
@metadata = meta
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def <<(val)
|
17
|
+
@metadata << val
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def each
|
22
|
+
@metadata.each do |site|
|
23
|
+
yield site
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def format( f=:ruby )
|
29
|
+
self.send "to_#{f}"
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def to_html
|
34
|
+
result = ""
|
35
|
+
@metadata.each do |site|
|
36
|
+
result << %~
|
37
|
+
<div class="site">
|
38
|
+
<h3>Site: #{site[:name]}</h3>~
|
39
|
+
site[:result].each do |entity|
|
40
|
+
result << entity.to_html
|
41
|
+
end
|
42
|
+
result << %~</div>~
|
43
|
+
end
|
44
|
+
result
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def to_xml
|
49
|
+
result = ""
|
50
|
+
|
51
|
+
result << %~<?xml version="1.0" encoding="utf-8"?>
|
52
|
+
<mml>~
|
53
|
+
@metadata.each do |site|
|
54
|
+
result << %~
|
55
|
+
<site name="#{site[:name]}">~
|
56
|
+
site[:result].each do |entity|
|
57
|
+
result << entity.to_xml
|
58
|
+
end
|
59
|
+
result << %~
|
60
|
+
</site>
|
61
|
+
~
|
62
|
+
end
|
63
|
+
result << %~</mml>~
|
64
|
+
|
65
|
+
result
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
def to_yaml
|
70
|
+
@metadata.to_yaml
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def to_array
|
75
|
+
@metadata
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
def to_text
|
80
|
+
result = ""
|
81
|
+
@metadata.each do |site|
|
82
|
+
next unless site[:result]
|
83
|
+
result << "Site: #{site[:name]}\n"
|
84
|
+
result << "URL: #{site[:url]}\n"
|
85
|
+
result << "\n"
|
86
|
+
site[:result].each do |entity|
|
87
|
+
result << entity.to_text
|
88
|
+
end
|
89
|
+
result << "\n"
|
90
|
+
end
|
91
|
+
result
|
92
|
+
end
|
93
|
+
alias :to_str :to_text
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end # MathMetadata
|
@@ -0,0 +1,221 @@
|
|
1
|
+
# -*-: coding: utf-8 -*-
|
2
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
|
+
|
4
|
+
require 'htmlentities'
|
5
|
+
require 'open-uri'
|
6
|
+
require 'i18n'
|
7
|
+
require 'cgi'
|
8
|
+
|
9
|
+
|
10
|
+
module MathMetadata
|
11
|
+
|
12
|
+
SITES = []
|
13
|
+
|
14
|
+
# Abstract class. Inherit in your sites definition.
|
15
|
+
class Site
|
16
|
+
|
17
|
+
def initialize( opts={} )
|
18
|
+
@options = { :verbose => true }.merge(opts)
|
19
|
+
end
|
20
|
+
|
21
|
+
# register new site class
|
22
|
+
def self.inherited( site )
|
23
|
+
SITES << site
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
# search for authors
|
28
|
+
def author( args={} )
|
29
|
+
opts = {:name => nil}.merge(args)
|
30
|
+
anf = author_name_forms opts[:name]
|
31
|
+
|
32
|
+
authors = []
|
33
|
+
anf.each do |af|
|
34
|
+
entry = Author.new({:id => af[1], :preferred => af[0], :forms => af[2]})
|
35
|
+
authors << entry unless entry[:id].to_s.strip.empty?
|
36
|
+
end
|
37
|
+
|
38
|
+
authors
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
# search for articles
|
43
|
+
def article( args={} )
|
44
|
+
opts = {:id => nil, :title => "", :year => "", :authors => [], :references => true}.merge(args)
|
45
|
+
|
46
|
+
page = fetch_article(opts)
|
47
|
+
articles = []
|
48
|
+
|
49
|
+
return metadata unless page
|
50
|
+
|
51
|
+
if list_of_articles?(page)
|
52
|
+
articles = get_article_list(page)
|
53
|
+
else
|
54
|
+
a = get_article(page, opts)
|
55
|
+
articles << a unless a[:title].to_s.strip.empty?
|
56
|
+
end
|
57
|
+
|
58
|
+
return nil if articles.size == 0
|
59
|
+
articles
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
protected
|
64
|
+
|
65
|
+
|
66
|
+
def method_missing(meth, *args)
|
67
|
+
page = args.first
|
68
|
+
|
69
|
+
case meth.to_s
|
70
|
+
when /^list_of_(.*)\?$/
|
71
|
+
re = eval("self.class::LIST_OF_#{$1.upcase}_RE")
|
72
|
+
return page =~ re
|
73
|
+
when /^get_(.*)_m$/
|
74
|
+
re = eval("self.class::#{$1.upcase}_RE")
|
75
|
+
re_s = eval("self.class::#{$1.upcase}S_RE")
|
76
|
+
m, n = args[1,2]
|
77
|
+
m ||= 1
|
78
|
+
n ||= 1
|
79
|
+
res = []
|
80
|
+
page.scan(re_s) do |match|
|
81
|
+
entry = []
|
82
|
+
m.times {|i| entry << match[i].to_s.strip}
|
83
|
+
entry << []
|
84
|
+
match[m].scan(re) do |form|
|
85
|
+
n.times {|i| entry[m] << form[i]}
|
86
|
+
end if match[m]
|
87
|
+
res << entry
|
88
|
+
end
|
89
|
+
return res
|
90
|
+
|
91
|
+
when /^get_(.*)_s$/
|
92
|
+
res = []
|
93
|
+
what = $1
|
94
|
+
re = eval("self.class::#{what.upcase}_RE")
|
95
|
+
re_s = eval("self.class::#{what.upcase}S_RE")
|
96
|
+
page =~ re_s
|
97
|
+
entries = $1
|
98
|
+
entries.to_s.strip.scan(re) do |match|
|
99
|
+
res << match[0].to_s.strip
|
100
|
+
end
|
101
|
+
return res
|
102
|
+
|
103
|
+
when /^get_(.*)$/
|
104
|
+
match = eval("self.class::#{$1.upcase}_RE").match(page).to_a.map{|x| x.to_s.strip}
|
105
|
+
match.shift
|
106
|
+
return match.first if args[1].to_i <= 1
|
107
|
+
return match
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
# search for author name forms
|
113
|
+
def author_name_forms( name )
|
114
|
+
forms = []
|
115
|
+
|
116
|
+
page = fetch_author name
|
117
|
+
forms = get_author_m page, 2, 1
|
118
|
+
|
119
|
+
forms
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
def get_article_references( page )
|
124
|
+
references = []
|
125
|
+
|
126
|
+
refs = get_article_reference_s page
|
127
|
+
|
128
|
+
i = 0;
|
129
|
+
refs.each do |r|
|
130
|
+
i+=1
|
131
|
+
ref = Reference.new r.gsub(/<.*?>/,'').gsub(/ +/,' ').strip, i
|
132
|
+
references << ref
|
133
|
+
end
|
134
|
+
|
135
|
+
references
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def get_article_msc( page )
|
140
|
+
mscs = get_article_msc_s page
|
141
|
+
mscs = MathMetadata.normalize_mscs(mscs)
|
142
|
+
mscs
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
def get_article( page, opts={} )
|
147
|
+
a = Article.new( {
|
148
|
+
:id => get_article_id(page),
|
149
|
+
:authors => get_article_author_s(page),
|
150
|
+
:msc => get_article_msc(page),
|
151
|
+
:publication => get_article_publication(page),
|
152
|
+
:range => MathMetadata.normalize_range(get_article_range(page)),
|
153
|
+
:year => get_article_year(page),
|
154
|
+
:keywords => get_article_keyword_s(page),
|
155
|
+
:issn => get_article_issn_s(page)
|
156
|
+
} )
|
157
|
+
|
158
|
+
a.title, a.language = get_article_title(page, 2)
|
159
|
+
a.title = a.title.to_s.gsub(/<\/span>/,'')
|
160
|
+
a.references = get_article_references(page) if opts[:references]
|
161
|
+
|
162
|
+
a
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
def get_article_list( page )
|
167
|
+
articles = []
|
168
|
+
page.scan(self.class::ARTICLE_ENTRY_RE).each do |match|
|
169
|
+
a = article(:id => match[0]).first
|
170
|
+
articles << a unless a[:title].to_s.strip.empty?
|
171
|
+
end
|
172
|
+
articles
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
def nwords(s)
|
177
|
+
s.split(" ")[0...@options[:nwords].to_i].join(" ")
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
def fetch_page( url, args={} )
|
182
|
+
opts = {:entities => true}.merge(args)
|
183
|
+
|
184
|
+
puts "fetching #{url}" if @options[:verbose]
|
185
|
+
page = URI.parse(url).read
|
186
|
+
page = HTMLEntities.decode_entities(page) if page and opts[:entities]
|
187
|
+
|
188
|
+
page
|
189
|
+
end
|
190
|
+
|
191
|
+
|
192
|
+
def fetch_author( name )
|
193
|
+
nn = MathMetadata.normalize_name(name)
|
194
|
+
url = self.class::AUTHOR_URL % URI.escape(nn)
|
195
|
+
|
196
|
+
fetch_page(url)
|
197
|
+
end
|
198
|
+
|
199
|
+
|
200
|
+
def join_article_authors( authors )
|
201
|
+
authors.collect { |author| URI.escape MathMetadata.normalize_name(author) }.join('; ') || ''
|
202
|
+
end
|
203
|
+
|
204
|
+
def fetch_article( args={} )
|
205
|
+
opts = {:id => nil, :title => "", :year => "", :authors => []}.merge(args)
|
206
|
+
url = self.class::ARTICLE_ID_URL % URI.escape(opts[:id].to_s.strip)
|
207
|
+
if opts[:id].to_s.strip.empty?
|
208
|
+
author = join_article_authors opts[:authors]
|
209
|
+
title = opts[:title]
|
210
|
+
title = '' if not title.kind_of?(String)
|
211
|
+
title = MathMetadata.normalize_text(title)
|
212
|
+
title = nwords(title) if @options[:nwords]
|
213
|
+
url = self.class::ARTICLE_URL % [URI.escape(title), author, opts[:year].to_s]
|
214
|
+
end
|
215
|
+
|
216
|
+
fetch_page(url, opts)
|
217
|
+
end
|
218
|
+
|
219
|
+
end # Site
|
220
|
+
|
221
|
+
end # Module
|