math_metadata_lookup 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +69 -0
- data/Rakefile +37 -0
- data/TODO +15 -0
- data/bin/math_metadata_lookup +134 -0
- data/lib/math_metadata_lookup.rb +28 -0
- data/lib/math_metadata_lookup/article.rb +150 -0
- data/lib/math_metadata_lookup/author.rb +50 -0
- data/lib/math_metadata_lookup/entity.rb +39 -0
- data/lib/math_metadata_lookup/lookup.rb +85 -0
- data/lib/math_metadata_lookup/reference.rb +122 -0
- data/lib/math_metadata_lookup/result.rb +97 -0
- data/lib/math_metadata_lookup/site.rb +221 -0
- data/lib/math_metadata_lookup/sites/mr.rb +67 -0
- data/lib/math_metadata_lookup/sites/zbl.rb +97 -0
- data/lib/math_metadata_lookup/tools.rb +110 -0
- data/math_metadata_lookup.gemspec +29 -0
- metadata +108 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
module MathMetadata
|
2
|
+
|
3
|
+
class Entity
|
4
|
+
|
5
|
+
def initialize( meta={} )
|
6
|
+
@metadata = meta.dup
|
7
|
+
end
|
8
|
+
|
9
|
+
def method_missing( meth, *args )
|
10
|
+
case meth.to_s
|
11
|
+
when /(.*?)=/
|
12
|
+
self[$1] = args.first
|
13
|
+
else
|
14
|
+
self[meth]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def [](key)
|
19
|
+
@metadata[key.to_sym]
|
20
|
+
end
|
21
|
+
|
22
|
+
def []=(key, value)
|
23
|
+
@metadata[key.to_sym] = value
|
24
|
+
end
|
25
|
+
|
26
|
+
def format( f=:ruby )
|
27
|
+
result = self
|
28
|
+
|
29
|
+
case f.to_sym
|
30
|
+
when :text, :html, :xml
|
31
|
+
result = self.send("to_#{f}")
|
32
|
+
end
|
33
|
+
|
34
|
+
result
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# -*-: coding: utf-8 -*-
|
2
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
|
+
|
4
|
+
module MathMetadata
|
5
|
+
|
6
|
+
# Main class for searching through all sites
|
7
|
+
class Lookup
|
8
|
+
attr_accessor :options
|
9
|
+
|
10
|
+
# :sites can be :all or array of allowed sites ([:mrev, :zbl])
|
11
|
+
def initialize( opts={} )
|
12
|
+
@options = { :sites => :all, :verbose => true }.merge(opts)
|
13
|
+
@sites = []
|
14
|
+
end
|
15
|
+
|
16
|
+
# calls method for each site
|
17
|
+
def method_missing(meth, *args)
|
18
|
+
result = []
|
19
|
+
|
20
|
+
sites = SITES.dup
|
21
|
+
if (@options[:sites] != :all) or @options[:sites].kind_of?(Array)
|
22
|
+
allowed = [@options[:sites]].flatten
|
23
|
+
sites.delete_if{|s| not allowed.include?(s::ID) }
|
24
|
+
end
|
25
|
+
|
26
|
+
sites.each do |klass|
|
27
|
+
site = klass.new(:verbose => @options[:verbose], :nwords => args[0][:nwords])
|
28
|
+
|
29
|
+
entry = {:site => klass::ID, :name => klass::NAME, :url => klass::URL}
|
30
|
+
entry[:result] = site.send(meth, *args)
|
31
|
+
|
32
|
+
result << entry
|
33
|
+
end
|
34
|
+
|
35
|
+
Result.new(result)
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
# try to decide what is best result for query and combine results from all sites to one article response
|
40
|
+
def heuristic( args={} )
|
41
|
+
opts = {:threshold => 0.6}.merge(args)
|
42
|
+
result = Result.new
|
43
|
+
|
44
|
+
# use only authors surnames
|
45
|
+
args_dup = args.dup
|
46
|
+
args_dup[:authors].map!{|a| a =~ /([^,]+)/; $1 ? $1 : a}
|
47
|
+
args_dup[:authors].map!{|a| a =~ /([^ ]+) \S+/; $1 ? $1 : a}
|
48
|
+
args_dup[:nwords] = 2
|
49
|
+
sites = article(args_dup)
|
50
|
+
|
51
|
+
# query article has to contain full names
|
52
|
+
query_article = Article.new( {:title => args[:title].to_s, :authors => args[:authors], :year => args[:year]} )
|
53
|
+
sites.each do |site|
|
54
|
+
site[:result].to_a.each do |article|
|
55
|
+
next if article[:title].to_s.empty?
|
56
|
+
article[:similarity] = query_article.similarity(article)
|
57
|
+
end
|
58
|
+
site[:result].to_a.delete_if{|a| a[:similarity].to_f < opts[:threshold].to_f}
|
59
|
+
if site[:result].to_a.size > 0
|
60
|
+
site[:result].sort!{|a,b| a[:similarity]<=>b[:similarity]}
|
61
|
+
site[:result].reverse!
|
62
|
+
site[:result] = [site[:result].to_a.first]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
sites
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
# parse reference string and execute heuristic to query for article in databases
|
71
|
+
def reference( args={} )
|
72
|
+
ref = Reference.new args[:reference]
|
73
|
+
pp ref if args[:verbose]
|
74
|
+
|
75
|
+
opts = {:threshold => 0.6}.merge(args)
|
76
|
+
opts[:title] = ref.article[:title]
|
77
|
+
opts[:authors] = ref.article[:authors]
|
78
|
+
opts[:year] = ref.article[:year]
|
79
|
+
|
80
|
+
heuristic opts
|
81
|
+
end
|
82
|
+
|
83
|
+
end # Lookup
|
84
|
+
|
85
|
+
end # module
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# -*-: coding: utf-8 -*-
|
2
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
|
+
|
4
|
+
module MathMetadata
|
5
|
+
|
6
|
+
class Reference
|
7
|
+
|
8
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range
|
9
|
+
ARTICLE_REFERENCE_1_RE = %r{([^:]+):\s*(.*?),\s*([^,]+),\s*\((\d{4})\)\s*,\s*([^ ]+)\s*.*?}mi
|
10
|
+
# 1=authors, 2=title, 3=publication, 4=range, 5=publisher, 6=place, 7=year
|
11
|
+
ARTICLE_REFERENCE_2_RE = %r{([^:]+):\s*(.*?),\s*(.*?,\s*[^,]+,\s*[^,]+,\s*[^,]+),\s*pp\.\s*([^,]+?),\s*([^,]+),\s*(.*?),\s*(\d{4})\s*.*?}mi
|
12
|
+
# 1=authors, 2=title, 3=range, 4=publication, 5=place, 6=year
|
13
|
+
ARTICLE_REFERENCE_3_RE = %r{([^:]+):\s*(.*?),\s*pp\.\s*([^,]+?),\s*([^,]+),\s*(.*?),\s*(\d{4})}mi
|
14
|
+
# 1=authors, 2=title, 3=publication, 4=publisher, 5=place, 6=year
|
15
|
+
ARTICLE_REFERENCE_4_RE = %r{([^:]+):\s*(.*?),\s*(.*?),\s*([^,]+),\s*([^,]+),\s*(\d{4})\s*.*?}mi
|
16
|
+
# 1=authors, 2=title, 4=publisher, 5=place, 6=year
|
17
|
+
ARTICLE_REFERENCE_5_RE = %r{([^:]+):\s*(.*?),\s*(.*?),\s*([^,]+),\s*(\d{4})\s*.*?}mi
|
18
|
+
# 1=authors, 2=title, 3=publisher, 4=place, 5=year
|
19
|
+
ARTICLE_REFERENCE_6_RE = %r{([^:]+):\s*(.*?),\s*([^,]+),\s*([^,]+),\s*(\d{4})\s*}mi
|
20
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range
|
21
|
+
ARTICLE_REFERENCE_7_RE = %r{([^:]+):\s*(.*),\s*(.*?,\s*\d+)\s*\((\d{4})\),\s*([^ ]+)\s*.*?}mi
|
22
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range
|
23
|
+
ARTICLE_REFERENCE_8_RE = %r{([^:]+):\s*(.*),\s*(.*?)\s*\((\d{4})\),\s*([^ ]+)\s*.*?}mi
|
24
|
+
# 1=authors, 2=title, 3=publisher, 4=place
|
25
|
+
ARTICLE_REFERENCE_9_RE = %r{([^:]+):\s*(.*?),\s*([^,]+),\s*(.*)}mi
|
26
|
+
# 1=authors, 2=title, 3=publication
|
27
|
+
ARTICLE_REFERENCE_10_RE = %r{([^:]+):\s*(.*?),\s*(.*?)\s*.*?}mi
|
28
|
+
# 1=authors, 2=title, 3=place, 4=year
|
29
|
+
ARTICLE_REFERENCE_11_RE = %r{([^:]+):\s*(.*),(.*?)\s+(\d{4})}mi
|
30
|
+
|
31
|
+
|
32
|
+
attr_accessor :source, :article, :suffix, :number, :reg
|
33
|
+
|
34
|
+
def initialize( str=nil, i=1 )
|
35
|
+
@number = i
|
36
|
+
if str.kind_of?(Article)
|
37
|
+
@source = @suffix = nil
|
38
|
+
@article = str
|
39
|
+
else
|
40
|
+
@source = str
|
41
|
+
@article, @suffix = Reference.parse(str) unless str.to_s.empty?
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
def self.parse( str )
|
47
|
+
article = Article.new
|
48
|
+
rnumber = 0
|
49
|
+
suffix = nil
|
50
|
+
found = []
|
51
|
+
(1..11).each do |j|
|
52
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range, 6=id, 7=place, 8=publisher
|
53
|
+
re = eval("Reference::ARTICLE_REFERENCE_#{j}_RE")
|
54
|
+
if str =~ re
|
55
|
+
case j
|
56
|
+
when 1
|
57
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range, 6=id
|
58
|
+
found = [$1, $2, $3, $4, MathMetadata.normalize_range($5), nil]
|
59
|
+
when 2
|
60
|
+
# 1=authors, 2=title, 3=publication, 4=range, 5=publisher, 6=place, 7=year, 8=id
|
61
|
+
found = [$1, $2, $3, $7, MathMetadata.normalize_range($4), nil, $6, $5]
|
62
|
+
when 3
|
63
|
+
# 1=authors, 2=title, 3=range, 4=publication, 5=place, 6=year
|
64
|
+
found = [$1, $2, $4, $6, MathMetadata.normalize_range($3), nil, $5]
|
65
|
+
when 4
|
66
|
+
# 1=authors, 2=title, 3=publication, 4=publisher, 5=place, 6=year, 7=id
|
67
|
+
found = [$1, $2, $3, $6, nil, nil, $5, $4]
|
68
|
+
when 5
|
69
|
+
# 1=authors, 2=title, 3=publisher, 4=place, 5=year, 6=id
|
70
|
+
found = [$1, $2, nil, $5, nil, nil, $4, $3]
|
71
|
+
when 6
|
72
|
+
# 1=authors, 2=title, 3=publisher, 4=place, 5=year, 6=id
|
73
|
+
found = [$1, $2, nil, $5, nil, nil, $4, $3]
|
74
|
+
when 7
|
75
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range, 6=id
|
76
|
+
found = [$1, $2, $3, $4, MathMetadata.normalize_range($5), nil]
|
77
|
+
when 8
|
78
|
+
# 1=authors, 2=title, 3=publication, 4=year, 5=range, 6=id
|
79
|
+
found = [$1, $2, $3, $4, MathMetadata.normalize_range($5), nil]
|
80
|
+
when 9
|
81
|
+
# 1=authors, 2=title, 3=publisher, 4=place
|
82
|
+
found = [$1, $2, nil, nil, nil, nil, $4, $3]
|
83
|
+
when 10
|
84
|
+
# 1=authors, 2=title, 3=publication, 4=id
|
85
|
+
found = [$1, $2, $3, nil, nil, nil, nil, nil]
|
86
|
+
when 11
|
87
|
+
# 1=authors, 2=title, 3=place, 4=year
|
88
|
+
found = [$1, $2, nil, $4, nil, nil, $3]
|
89
|
+
end
|
90
|
+
rnumber = j
|
91
|
+
break
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
[:authors, :title, :publication, :year, :range, :id, :place, :publisher].each_with_index do |key, idx|
|
96
|
+
article[key] = found[idx]
|
97
|
+
end
|
98
|
+
article.authors = Reference.split_authors article.authors
|
99
|
+
|
100
|
+
[article, suffix, rnumber]
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
def self.split_authors( str )
|
105
|
+
res = [
|
106
|
+
/;\s*/,
|
107
|
+
/,?\s*(?:and|und|et)\s+/,
|
108
|
+
/(\S+,\s*[^,]+),?\s*/
|
109
|
+
]
|
110
|
+
|
111
|
+
authors = [str]
|
112
|
+
res.each do |re|
|
113
|
+
authors = authors.map{|a| a.to_s.split(re)}.flatten
|
114
|
+
end
|
115
|
+
authors.delete_if{|a| a.strip.empty?}
|
116
|
+
|
117
|
+
authors
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# -*-: coding: utf-8 -*-
|
2
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
|
+
|
4
|
+
module MathMetadata
|
5
|
+
|
6
|
+
class Result
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
FORMATS = [:ruby, :yaml, :xml, :html, :text]
|
10
|
+
|
11
|
+
def initialize( meta=[] )
|
12
|
+
@metadata = meta
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def <<(val)
|
17
|
+
@metadata << val
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def each
|
22
|
+
@metadata.each do |site|
|
23
|
+
yield site
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def format( f=:ruby )
|
29
|
+
self.send "to_#{f}"
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def to_html
|
34
|
+
result = ""
|
35
|
+
@metadata.each do |site|
|
36
|
+
result << %~
|
37
|
+
<div class="site">
|
38
|
+
<h3>Site: #{site[:name]}</h3>~
|
39
|
+
site[:result].each do |entity|
|
40
|
+
result << entity.to_html
|
41
|
+
end
|
42
|
+
result << %~</div>~
|
43
|
+
end
|
44
|
+
result
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def to_xml
|
49
|
+
result = ""
|
50
|
+
|
51
|
+
result << %~<?xml version="1.0" encoding="utf-8"?>
|
52
|
+
<mml>~
|
53
|
+
@metadata.each do |site|
|
54
|
+
result << %~
|
55
|
+
<site name="#{site[:name]}">~
|
56
|
+
site[:result].each do |entity|
|
57
|
+
result << entity.to_xml
|
58
|
+
end
|
59
|
+
result << %~
|
60
|
+
</site>
|
61
|
+
~
|
62
|
+
end
|
63
|
+
result << %~</mml>~
|
64
|
+
|
65
|
+
result
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
def to_yaml
|
70
|
+
@metadata.to_yaml
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def to_array
|
75
|
+
@metadata
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
def to_text
|
80
|
+
result = ""
|
81
|
+
@metadata.each do |site|
|
82
|
+
next unless site[:result]
|
83
|
+
result << "Site: #{site[:name]}\n"
|
84
|
+
result << "URL: #{site[:url]}\n"
|
85
|
+
result << "\n"
|
86
|
+
site[:result].each do |entity|
|
87
|
+
result << entity.to_text
|
88
|
+
end
|
89
|
+
result << "\n"
|
90
|
+
end
|
91
|
+
result
|
92
|
+
end
|
93
|
+
alias :to_str :to_text
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end # MathMetadata
|
@@ -0,0 +1,221 @@
|
|
1
|
+
# -*-: coding: utf-8 -*-
|
2
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
|
+
|
4
|
+
require 'htmlentities'
|
5
|
+
require 'open-uri'
|
6
|
+
require 'i18n'
|
7
|
+
require 'cgi'
|
8
|
+
|
9
|
+
|
10
|
+
module MathMetadata
|
11
|
+
|
12
|
+
SITES = []
|
13
|
+
|
14
|
+
# Abstract class. Inherit in your sites definition.
|
15
|
+
class Site
|
16
|
+
|
17
|
+
def initialize( opts={} )
|
18
|
+
@options = { :verbose => true }.merge(opts)
|
19
|
+
end
|
20
|
+
|
21
|
+
# register new site class
|
22
|
+
def self.inherited( site )
|
23
|
+
SITES << site
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
# search for authors
|
28
|
+
def author( args={} )
|
29
|
+
opts = {:name => nil}.merge(args)
|
30
|
+
anf = author_name_forms opts[:name]
|
31
|
+
|
32
|
+
authors = []
|
33
|
+
anf.each do |af|
|
34
|
+
entry = Author.new({:id => af[1], :preferred => af[0], :forms => af[2]})
|
35
|
+
authors << entry unless entry[:id].to_s.strip.empty?
|
36
|
+
end
|
37
|
+
|
38
|
+
authors
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
# search for articles
|
43
|
+
def article( args={} )
|
44
|
+
opts = {:id => nil, :title => "", :year => "", :authors => [], :references => true}.merge(args)
|
45
|
+
|
46
|
+
page = fetch_article(opts)
|
47
|
+
articles = []
|
48
|
+
|
49
|
+
return metadata unless page
|
50
|
+
|
51
|
+
if list_of_articles?(page)
|
52
|
+
articles = get_article_list(page)
|
53
|
+
else
|
54
|
+
a = get_article(page, opts)
|
55
|
+
articles << a unless a[:title].to_s.strip.empty?
|
56
|
+
end
|
57
|
+
|
58
|
+
return nil if articles.size == 0
|
59
|
+
articles
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
protected
|
64
|
+
|
65
|
+
|
66
|
+
def method_missing(meth, *args)
|
67
|
+
page = args.first
|
68
|
+
|
69
|
+
case meth.to_s
|
70
|
+
when /^list_of_(.*)\?$/
|
71
|
+
re = eval("self.class::LIST_OF_#{$1.upcase}_RE")
|
72
|
+
return page =~ re
|
73
|
+
when /^get_(.*)_m$/
|
74
|
+
re = eval("self.class::#{$1.upcase}_RE")
|
75
|
+
re_s = eval("self.class::#{$1.upcase}S_RE")
|
76
|
+
m, n = args[1,2]
|
77
|
+
m ||= 1
|
78
|
+
n ||= 1
|
79
|
+
res = []
|
80
|
+
page.scan(re_s) do |match|
|
81
|
+
entry = []
|
82
|
+
m.times {|i| entry << match[i].to_s.strip}
|
83
|
+
entry << []
|
84
|
+
match[m].scan(re) do |form|
|
85
|
+
n.times {|i| entry[m] << form[i]}
|
86
|
+
end if match[m]
|
87
|
+
res << entry
|
88
|
+
end
|
89
|
+
return res
|
90
|
+
|
91
|
+
when /^get_(.*)_s$/
|
92
|
+
res = []
|
93
|
+
what = $1
|
94
|
+
re = eval("self.class::#{what.upcase}_RE")
|
95
|
+
re_s = eval("self.class::#{what.upcase}S_RE")
|
96
|
+
page =~ re_s
|
97
|
+
entries = $1
|
98
|
+
entries.to_s.strip.scan(re) do |match|
|
99
|
+
res << match[0].to_s.strip
|
100
|
+
end
|
101
|
+
return res
|
102
|
+
|
103
|
+
when /^get_(.*)$/
|
104
|
+
match = eval("self.class::#{$1.upcase}_RE").match(page).to_a.map{|x| x.to_s.strip}
|
105
|
+
match.shift
|
106
|
+
return match.first if args[1].to_i <= 1
|
107
|
+
return match
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
# search for author name forms
|
113
|
+
def author_name_forms( name )
|
114
|
+
forms = []
|
115
|
+
|
116
|
+
page = fetch_author name
|
117
|
+
forms = get_author_m page, 2, 1
|
118
|
+
|
119
|
+
forms
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
def get_article_references( page )
|
124
|
+
references = []
|
125
|
+
|
126
|
+
refs = get_article_reference_s page
|
127
|
+
|
128
|
+
i = 0;
|
129
|
+
refs.each do |r|
|
130
|
+
i+=1
|
131
|
+
ref = Reference.new r.gsub(/<.*?>/,'').gsub(/ +/,' ').strip, i
|
132
|
+
references << ref
|
133
|
+
end
|
134
|
+
|
135
|
+
references
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def get_article_msc( page )
|
140
|
+
mscs = get_article_msc_s page
|
141
|
+
mscs = MathMetadata.normalize_mscs(mscs)
|
142
|
+
mscs
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
def get_article( page, opts={} )
|
147
|
+
a = Article.new( {
|
148
|
+
:id => get_article_id(page),
|
149
|
+
:authors => get_article_author_s(page),
|
150
|
+
:msc => get_article_msc(page),
|
151
|
+
:publication => get_article_publication(page),
|
152
|
+
:range => MathMetadata.normalize_range(get_article_range(page)),
|
153
|
+
:year => get_article_year(page),
|
154
|
+
:keywords => get_article_keyword_s(page),
|
155
|
+
:issn => get_article_issn_s(page)
|
156
|
+
} )
|
157
|
+
|
158
|
+
a.title, a.language = get_article_title(page, 2)
|
159
|
+
a.title = a.title.to_s.gsub(/<\/span>/,'')
|
160
|
+
a.references = get_article_references(page) if opts[:references]
|
161
|
+
|
162
|
+
a
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
def get_article_list( page )
|
167
|
+
articles = []
|
168
|
+
page.scan(self.class::ARTICLE_ENTRY_RE).each do |match|
|
169
|
+
a = article(:id => match[0]).first
|
170
|
+
articles << a unless a[:title].to_s.strip.empty?
|
171
|
+
end
|
172
|
+
articles
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
def nwords(s)
|
177
|
+
s.split(" ")[0...@options[:nwords].to_i].join(" ")
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
def fetch_page( url, args={} )
|
182
|
+
opts = {:entities => true}.merge(args)
|
183
|
+
|
184
|
+
puts "fetching #{url}" if @options[:verbose]
|
185
|
+
page = URI.parse(url).read
|
186
|
+
page = HTMLEntities.decode_entities(page) if page and opts[:entities]
|
187
|
+
|
188
|
+
page
|
189
|
+
end
|
190
|
+
|
191
|
+
|
192
|
+
def fetch_author( name )
|
193
|
+
nn = MathMetadata.normalize_name(name)
|
194
|
+
url = self.class::AUTHOR_URL % URI.escape(nn)
|
195
|
+
|
196
|
+
fetch_page(url)
|
197
|
+
end
|
198
|
+
|
199
|
+
|
200
|
+
def join_article_authors( authors )
|
201
|
+
authors.collect { |author| URI.escape MathMetadata.normalize_name(author) }.join('; ') || ''
|
202
|
+
end
|
203
|
+
|
204
|
+
def fetch_article( args={} )
|
205
|
+
opts = {:id => nil, :title => "", :year => "", :authors => []}.merge(args)
|
206
|
+
url = self.class::ARTICLE_ID_URL % URI.escape(opts[:id].to_s.strip)
|
207
|
+
if opts[:id].to_s.strip.empty?
|
208
|
+
author = join_article_authors opts[:authors]
|
209
|
+
title = opts[:title]
|
210
|
+
title = '' if not title.kind_of?(String)
|
211
|
+
title = MathMetadata.normalize_text(title)
|
212
|
+
title = nwords(title) if @options[:nwords]
|
213
|
+
url = self.class::ARTICLE_URL % [URI.escape(title), author, opts[:year].to_s]
|
214
|
+
end
|
215
|
+
|
216
|
+
fetch_page(url, opts)
|
217
|
+
end
|
218
|
+
|
219
|
+
end # Site
|
220
|
+
|
221
|
+
end # Module
|