math_metadata_lookup 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +69 -0
- data/Rakefile +37 -0
- data/TODO +15 -0
- data/bin/math_metadata_lookup +134 -0
- data/lib/math_metadata_lookup.rb +28 -0
- data/lib/math_metadata_lookup/article.rb +150 -0
- data/lib/math_metadata_lookup/author.rb +50 -0
- data/lib/math_metadata_lookup/entity.rb +39 -0
- data/lib/math_metadata_lookup/lookup.rb +85 -0
- data/lib/math_metadata_lookup/reference.rb +122 -0
- data/lib/math_metadata_lookup/result.rb +97 -0
- data/lib/math_metadata_lookup/site.rb +221 -0
- data/lib/math_metadata_lookup/sites/mr.rb +67 -0
- data/lib/math_metadata_lookup/sites/zbl.rb +97 -0
- data/lib/math_metadata_lookup/tools.rb +110 -0
- data/math_metadata_lookup.gemspec +29 -0
- metadata +108 -0
data/README.md
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
About
|
2
|
+
-----
|
3
|
+
|
4
|
+
This utility search mathematical reviews sites and fetches metadata about articles.
|
5
|
+
It returns results as one of text, xml, html, yaml or ruby formats.
|
6
|
+
|
7
|
+
|
8
|
+
Command line usage example
|
9
|
+
--------------------------
|
10
|
+
|
11
|
+
To get full help run it without any argument
|
12
|
+
|
13
|
+
math_metadata_lookup
|
14
|
+
|
15
|
+
Fetching metadata about an article:
|
16
|
+
|
17
|
+
math_metadata_lookup.rb article -t "Sobolev embeddings with variable exponent. II"
|
18
|
+
|
19
|
+
Returns list of articles:
|
20
|
+
bin/math_metadata_lookup.rb article -t "Sobolev embeddings" -a "Rakosnik, Jiri" -a "Edmunds, David" -f html
|
21
|
+
|
22
|
+
Searching for authors:
|
23
|
+
bin/math_metadata_lookup.rb author -a "Vesely, Jiri"
|
24
|
+
|
25
|
+
|
26
|
+
Usage from ruby
|
27
|
+
---------------
|
28
|
+
|
29
|
+
require 'rubygems'
|
30
|
+
require 'math_metadata_lookup'
|
31
|
+
|
32
|
+
# initialize search engine to look only to Mathematical Reviews database
|
33
|
+
l = MathMetadata:Lookup.new :sites => [:mrev]
|
34
|
+
|
35
|
+
article = l.article( :title => "Sobolev embeddings with variable exponent. II" ).first
|
36
|
+
p article[:authors] if article
|
37
|
+
|
38
|
+
|
39
|
+
Resources
|
40
|
+
---------
|
41
|
+
|
42
|
+
Content of the resource directory:
|
43
|
+
|
44
|
+
* **``math_metadata_lookup.js``**: contains function ``toggle_references( id )``. It can toggle visibility of references in html document. By default are all references visible. If you set in css class references attribute display to none it will be hidden by default.
|
45
|
+
|
46
|
+
|
47
|
+
Function reference
|
48
|
+
------------------
|
49
|
+
|
50
|
+
#Lookup#article( hash )
|
51
|
+
|
52
|
+
Hash arguments are:
|
53
|
+
|
54
|
+
* article id is known
|
55
|
+
* **:id**
|
56
|
+
* article id is unknown
|
57
|
+
* **:title** String
|
58
|
+
* **:authors** Array of strings
|
59
|
+
|
60
|
+
Returns instance of class Result.
|
61
|
+
|
62
|
+
|
63
|
+
#Lookup#author( hash )
|
64
|
+
|
65
|
+
Hash arguments are:
|
66
|
+
|
67
|
+
* **:name** String. Author name.
|
68
|
+
|
69
|
+
Returns instance of class Result.
|
data/Rakefile
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
|
+
#
|
4
|
+
# @author: Petr Kovar <pejuko@gmail.com>
|
5
|
+
$KCODE='UTF8'
|
6
|
+
|
7
|
+
require 'rake/gempackagetask'
|
8
|
+
require 'rake/clean'
|
9
|
+
|
10
|
+
CLEAN << "coverage" << "pkg" << "README.html" << "CHANGELOG.html" << '*.rbc'
|
11
|
+
|
12
|
+
task :default => [:doc, :gem]
|
13
|
+
|
14
|
+
Rake::GemPackageTask.new(eval(File.read("math_metadata_lookup.gemspec"))) {|pkg|}
|
15
|
+
|
16
|
+
begin
|
17
|
+
require 'bluecloth'
|
18
|
+
|
19
|
+
def build_document(mdfile)
|
20
|
+
fname = $1 if mdfile =~ /(.*)\.md$/
|
21
|
+
raise "Unknown file type" unless fname
|
22
|
+
|
23
|
+
data = File.read(mdfile)
|
24
|
+
md = Markdown.new(data)
|
25
|
+
htmlfile = "#{fname}.html"
|
26
|
+
|
27
|
+
File.open(htmlfile, "w") { |f| f << md.to_html }
|
28
|
+
end
|
29
|
+
|
30
|
+
task :doc => [:readme]
|
31
|
+
|
32
|
+
task :readme do |t|
|
33
|
+
build_document("README.md")
|
34
|
+
end
|
35
|
+
|
36
|
+
rescue LoadError
|
37
|
+
end
|
data/TODO
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*-: coding: utf-8 -*-
|
3
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
4
|
+
|
5
|
+
$KCODE="UTF8" if RUBY_VERSION < "1.9"
|
6
|
+
|
7
|
+
$:.unshift File.expand_path(File.join(File.dirname(__FILE__), "../lib"))
|
8
|
+
|
9
|
+
def print_help
|
10
|
+
puts "
|
11
|
+
Search mathematical reviews sites and fetches metadata about articles.
|
12
|
+
|
13
|
+
#{$0} <command> <options>
|
14
|
+
|
15
|
+
|
16
|
+
Commands: article, author, heuristic, reference
|
17
|
+
|
18
|
+
article -- create and run query for each site and return results
|
19
|
+
|
20
|
+
--title, -t <title>
|
21
|
+
--author, -a <author> -- repeatable option
|
22
|
+
--year, -y <year>
|
23
|
+
|
24
|
+
--id, -i <id> -- if id is used, year, title and author are ignored
|
25
|
+
it is good to use it with -s option
|
26
|
+
|
27
|
+
author -- search for authors \"name forms\"
|
28
|
+
|
29
|
+
--author, -a <author> -- only once
|
30
|
+
|
31
|
+
|
32
|
+
heuristic -- returns only one best match from each site where
|
33
|
+
similarity is higher the threshold
|
34
|
+
|
35
|
+
--title, -t <title>
|
36
|
+
--author, -a <author> -- repeatable option
|
37
|
+
--year, -y <year>
|
38
|
+
--threshold, -d <0.0...1.0> -- default: 0.6
|
39
|
+
|
40
|
+
|
41
|
+
reference -- parse reference string and run heuristic
|
42
|
+
|
43
|
+
--reference, -r <string with reference> -- parse the string to get title, authors and year
|
44
|
+
--threshold, -d <0.0...1.0> -- default: 0.6
|
45
|
+
|
46
|
+
|
47
|
+
common options:
|
48
|
+
--site, -s <mr,zbl> -- repeatable, sites to search on, default: all
|
49
|
+
--format, -f <text|html|xml|ruby|yaml> -- output format, default: text
|
50
|
+
--verbose, -v
|
51
|
+
|
52
|
+
|
53
|
+
Examples:
|
54
|
+
|
55
|
+
#{$0} article -t \"Sobolev embeddings with variable exponent. II\"
|
56
|
+
#{$0} article -t \"Sobolev embeddings\" -a \"Rakosnik, Jiri\" -a \"Edmunds, David\" -f html
|
57
|
+
#{$0} author -a \"Vesely, Jiri\"
|
58
|
+
#{$0} reference -r \"Kufner, A., John, O., and Fučík, S.: Function Spaces, Noordhoff, Leyden, and Academia, Prague, 1977\" -d 0.4
|
59
|
+
"
|
60
|
+
end
|
61
|
+
|
62
|
+
require 'pp'
|
63
|
+
require 'yaml'
|
64
|
+
require 'rubygems'
|
65
|
+
require 'math_metadata_lookup'
|
66
|
+
|
67
|
+
$command = ARGV.shift
|
68
|
+
unless $command
|
69
|
+
print_help
|
70
|
+
exit 1
|
71
|
+
end
|
72
|
+
|
73
|
+
require 'getoptlong'
|
74
|
+
opts = GetoptLong.new(
|
75
|
+
["--title", "-t", GetoptLong::REQUIRED_ARGUMENT],
|
76
|
+
["--author", "-a", GetoptLong::REQUIRED_ARGUMENT],
|
77
|
+
["--year", "-y", GetoptLong::REQUIRED_ARGUMENT],
|
78
|
+
["--id", "-i", GetoptLong::REQUIRED_ARGUMENT],
|
79
|
+
["--site", "-s", GetoptLong::REQUIRED_ARGUMENT],
|
80
|
+
["--verbose", "-v", GetoptLong::NO_ARGUMENT],
|
81
|
+
["--format", "-f", GetoptLong::REQUIRED_ARGUMENT],
|
82
|
+
["--threshold", "-d", GetoptLong::REQUIRED_ARGUMENT],
|
83
|
+
["--reference", "-r", GetoptLong::REQUIRED_ARGUMENT]
|
84
|
+
)
|
85
|
+
|
86
|
+
$options = {:sites => [], :authors => [], :format => :text, :verbose => false}
|
87
|
+
opts.each do |opt, val|
|
88
|
+
optkey = opt[2..-1].to_sym
|
89
|
+
case optkey
|
90
|
+
when :title, :id, :year, :reference
|
91
|
+
$options[optkey] = val.strip
|
92
|
+
when :site, :author
|
93
|
+
$options["#{optkey}s".to_sym] << val.strip
|
94
|
+
when :format
|
95
|
+
$options[optkey] = val.strip.to_sym
|
96
|
+
when :threshold
|
97
|
+
$options[optkey] = val.to_f
|
98
|
+
when :verbose
|
99
|
+
$options[optkey] = true
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
unless MathMetadata::Result::FORMATS.include?($options[:format].to_sym)
|
104
|
+
print_help
|
105
|
+
exit 1
|
106
|
+
end
|
107
|
+
|
108
|
+
pp $options if $options[:verbose]
|
109
|
+
|
110
|
+
sites = $options[:sites].size == 0 ? :all : $options[:sites].map{|s| s.to_sym}
|
111
|
+
l = MathMetadata::Lookup.new :sites => sites, :verbose => $options[:verbose]
|
112
|
+
|
113
|
+
args = $options.dup
|
114
|
+
args[:format] = $options[:format] == :yaml ? :ruby : $options[:format].to_sym
|
115
|
+
result = case $command
|
116
|
+
when 'article'
|
117
|
+
l.article args
|
118
|
+
when 'author'
|
119
|
+
l.author :name => $options[:authors].first, :format => args[:format]
|
120
|
+
when 'heuristic'
|
121
|
+
l.heuristic args
|
122
|
+
when 'reference'
|
123
|
+
l.reference args
|
124
|
+
else
|
125
|
+
print_help
|
126
|
+
exit 1
|
127
|
+
end
|
128
|
+
|
129
|
+
case $options[:format].to_sym
|
130
|
+
when :ruby
|
131
|
+
pp result
|
132
|
+
when :yaml, :html, :xml, :text
|
133
|
+
puts result.format($options[:format])
|
134
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*-: coding: utf-8 -*-
|
2
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
|
+
|
4
|
+
dir = File.expand_path(File.dirname(__FILE__))
|
5
|
+
$:.unshift(dir) unless $:.include?(dir)
|
6
|
+
|
7
|
+
# tools
|
8
|
+
require 'math_metadata_lookup/tools'
|
9
|
+
|
10
|
+
# result class
|
11
|
+
require 'math_metadata_lookup/result'
|
12
|
+
|
13
|
+
# entities
|
14
|
+
require 'math_metadata_lookup/entity'
|
15
|
+
require 'math_metadata_lookup/article'
|
16
|
+
require 'math_metadata_lookup/author'
|
17
|
+
require 'math_metadata_lookup/reference'
|
18
|
+
|
19
|
+
# abstract class for sites
|
20
|
+
require 'math_metadata_lookup/site'
|
21
|
+
|
22
|
+
# load up sites definition
|
23
|
+
Dir["#{dir}/math_metadata_lookup/sites/*.rb"].each do |site|
|
24
|
+
require site
|
25
|
+
end
|
26
|
+
|
27
|
+
# main class
|
28
|
+
require 'math_metadata_lookup/lookup'
|
@@ -0,0 +1,150 @@
|
|
1
|
+
module MathMetadata
|
2
|
+
|
3
|
+
class Article < Entity
|
4
|
+
|
5
|
+
def ==(article)
|
6
|
+
similarity(article) > 0.9
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
def similarity(article)
|
11
|
+
td = MathMetadata.levenshtein_distance @metadata[:title].to_s, article[:title].to_s
|
12
|
+
ad = MathMetadata.levenshtein_distance [@metadata[:authors]].flatten.sort.join(";"), [article[:authors]].flatten.sort.join(";")
|
13
|
+
yd = MathMetadata.levenshtein_distance @metadata[:year].to_s, article[:year].to_s
|
14
|
+
|
15
|
+
m = []
|
16
|
+
m << [td, 2.8] unless @metadata[:title].to_s.empty?
|
17
|
+
m << [ad, 1.4] unless [@metadata[:authors]].flatten.join(";").empty?
|
18
|
+
m << [yd, 1.0] unless @metadata[:year].to_s.empty?
|
19
|
+
|
20
|
+
sum = m.inject(0.0){|s,x| s += x[1]}
|
21
|
+
|
22
|
+
d = m.inject(0.0){|s,x| s+= x[0]*x[1]} / sum
|
23
|
+
#p [td, ad, yd]
|
24
|
+
#p d
|
25
|
+
|
26
|
+
d
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def to_text
|
31
|
+
result = ""
|
32
|
+
result += %~Id: #{@metadata[:id]}
|
33
|
+
Similarity: #{@metadata[:similarity]}
|
34
|
+
Publication: #{@metadata[:publication]}
|
35
|
+
Title: #{@metadata[:title]}
|
36
|
+
Authors: #{[@metadata[:authors]].flatten.join("; ")}
|
37
|
+
Year: #{@metadata[:year]}
|
38
|
+
Language: #{@metadata[:language]}
|
39
|
+
MSC: #{[@metadata[:msc]].flatten.join("; ")}
|
40
|
+
Pages: #{@metadata[:range]}
|
41
|
+
ISSN: #{@metadata[:issn].join('; ')}
|
42
|
+
Keywords: #{@metadata[:keywords].join('; ')}~
|
43
|
+
@metadata[:references].to_a.each_with_index do |ref, idx|
|
44
|
+
a = ref.article
|
45
|
+
result += %~
|
46
|
+
Ref.: #{idx+1}. #{[a[:authors]].flatten.join("; ")}: #{a[:title]}~
|
47
|
+
end
|
48
|
+
result += "\n\n"
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
def to_xml
|
54
|
+
result = %~
|
55
|
+
<article id="#{::CGI.escapeHTML @metadata[:id].to_s}" year="#{::CGI.escapeHTML @metadata[:year].to_s}" lang="#{::CGI.escapeHTML @metadata[:language].to_s}">
|
56
|
+
<publication>#{::CGI.escapeHTML @metadata[:publication].to_s}</publication>
|
57
|
+
<title>#{::CGI.escapeHTML @metadata[:title].to_s}</title>
|
58
|
+
<authors>~
|
59
|
+
@metadata[:authors].to_a.each do |author|
|
60
|
+
result += %~
|
61
|
+
<author>#{::CGI.escapeHTML author}</author>~
|
62
|
+
end
|
63
|
+
result += %~
|
64
|
+
</authors>
|
65
|
+
<msc>~
|
66
|
+
@metadata[:msc].to_a.each do |msc|
|
67
|
+
result += %~
|
68
|
+
<class>#{::CGI.escapeHTML msc}</class>~
|
69
|
+
end
|
70
|
+
result += %~
|
71
|
+
</msc>
|
72
|
+
<pages>#{::CGI.escapeHTML @metadata[:range].to_s}</pages>~
|
73
|
+
@metadata[:issn].to_a.each do |issn|
|
74
|
+
result += %~
|
75
|
+
<issn>#{::CGI.escapeHTML issn}</issn>~
|
76
|
+
end
|
77
|
+
@metadata[:keywords].to_a.each do |keyword|
|
78
|
+
result += %~
|
79
|
+
<keyword>#{::CGI.escapeHTML keyword}</keyword>~
|
80
|
+
end
|
81
|
+
result += %~
|
82
|
+
<references>
|
83
|
+
~
|
84
|
+
@metadata[:references].to_a.each_with_index do |reference, idx|
|
85
|
+
ref = reference.article
|
86
|
+
result += %~
|
87
|
+
<reference id="#{::CGI.escapeHTML ref[:id].to_s}" number="#{::CGI.escapeHTML(ref[:number].to_s || (idx+1).to_s)}">
|
88
|
+
<source>#{::CGI.escapeHTML reference.source}</source>
|
89
|
+
<authors>~
|
90
|
+
[ref[:authors]].flatten.each do |author|
|
91
|
+
result += %~
|
92
|
+
<author>#{::CGI.escapeHTML author}</author>~
|
93
|
+
end
|
94
|
+
result += %~
|
95
|
+
</authors>
|
96
|
+
<title>#{::CGI.escapeHTML ref[:title].to_s}</title>
|
97
|
+
<publication>#{::CGI.escapeHTML ref[:publication].to_s}</publication>
|
98
|
+
<publisher>#{::CGI.escapeHTML ref[:publisher].to_s}</publisher>
|
99
|
+
<year>#{::CGI.escapeHTML ref[:year].to_s}</year>
|
100
|
+
</reference>
|
101
|
+
~
|
102
|
+
end
|
103
|
+
result += %~
|
104
|
+
</references>
|
105
|
+
</article>
|
106
|
+
~
|
107
|
+
result
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
def to_html
|
112
|
+
result = %~
|
113
|
+
<div class="article">
|
114
|
+
Id: <span class="id">#{::CGI.escapeHTML @metadata[:id].to_s}</span><br />
|
115
|
+
Publication: <span class="publication">#{::CGI.escapeHTML @metadata[:publication].to_s}</span><br />
|
116
|
+
Title: <span class="title">#{::CGI.escapeHTML @metadata[:title].to_s}</span><br />
|
117
|
+
Authors: <span class="authors">#{::CGI.escapeHTML @metadata[:authors].to_a.join("; ")}</span><br />
|
118
|
+
Year: <span class="year">#{::CGI.escapeHTML @metadata[:year].to_s}</span><br />
|
119
|
+
Language: <span class="lang">#{::CGI.escapeHTML @metadata[:language].to_s}</span><br />
|
120
|
+
MSC: <span class="msc">#{::CGI.escapeHTML @metadata[:msc].to_a.join("; ")}</span><br />
|
121
|
+
Pages: <span class="pages">#{::CGI.escapeHTML @metadata[:range].to_s}</span><br />
|
122
|
+
ISSN: <span class="issn">#{::CGI.escapeHTML @metadata[:issn].to_a.join('; ')}</span><br />
|
123
|
+
Keywords: <span class="keywords">#{::CGI.escapeHTML @metadata[:keywords].to_a.join('; ')}</span><br />
|
124
|
+
<a href="javascript:toggle_references('ref#{@metadata[:id]}')">References >>></a>
|
125
|
+
<div id="ref#{@metadata[:id]}" name="ref#{@metadata[:id]}"class="references">
|
126
|
+
~
|
127
|
+
@metadata[:references].to_a.each_with_index do |reference, idx|
|
128
|
+
ref = reference.article
|
129
|
+
result += %~
|
130
|
+
<div class="reference">
|
131
|
+
Source: #{reference.source}
|
132
|
+
Authors: #{[ref[:authors]].flatten.join("; ")}
|
133
|
+
Title: #{ref[:title]}
|
134
|
+
Publication: #{ref[:publication]}
|
135
|
+
Publisher: #{ref[:publisher]}
|
136
|
+
Year: #{ref[:year]}
|
137
|
+
Id: #{ref[:id]}
|
138
|
+
</div>
|
139
|
+
~
|
140
|
+
end
|
141
|
+
result += %~
|
142
|
+
</div>
|
143
|
+
</div>
|
144
|
+
~
|
145
|
+
result
|
146
|
+
end
|
147
|
+
|
148
|
+
end # class
|
149
|
+
|
150
|
+
end # module
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module MathMetadata
|
2
|
+
|
3
|
+
class Author < Entity
|
4
|
+
|
5
|
+
def to_text
|
6
|
+
result = %~Id: #{@metadata[:id]}\nPreferred: #{@metadata[:preferred]}~
|
7
|
+
@metadata[:forms].to_a.each do |form|
|
8
|
+
result += %~
|
9
|
+
Other: #{form}~
|
10
|
+
end
|
11
|
+
result += "\n\n"
|
12
|
+
result
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def to_xml
|
17
|
+
result = %~
|
18
|
+
<author id="#{::CGI.escapeHTML(@metadata[:id])}">
|
19
|
+
<name form="preferred">#{::CGI.escapeHTML(@metadata[:preferred])}</name>~
|
20
|
+
@metadata[:forms].each do |form|
|
21
|
+
result += %~
|
22
|
+
<name form="other">#{::CGI.escapeHTML(form)}</name>~
|
23
|
+
end
|
24
|
+
result += %~
|
25
|
+
</author>
|
26
|
+
~
|
27
|
+
result
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def to_html
|
32
|
+
result = %~
|
33
|
+
<div class="author">
|
34
|
+
<div class="author_id">Id: #{::CGI.escapeHTML(@metadata[:id])}</div>
|
35
|
+
<div class="preferred">Preferred: #{::CGI.escapeHTML(@metadata[:preferred])}</div>~
|
36
|
+
|
37
|
+
@metadata[:forms].each do |form|
|
38
|
+
result += %~
|
39
|
+
<div class="other">Other: #{::CGI.escapeHTML(form)}</div>~
|
40
|
+
end
|
41
|
+
|
42
|
+
result += %~
|
43
|
+
</div>
|
44
|
+
~
|
45
|
+
result
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|