math_metadata_lookup 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +9 -8
- data/Rakefile +2 -0
- data/bin/math_metadata_lookup +1 -1
- data/lib/math_metadata_lookup/result.rb +15 -0
- data/lib/math_metadata_lookup/site.rb +6 -1
- data/lib/math_metadata_lookup/sites/bas-bg.rb +58 -0
- data/lib/math_metadata_lookup/sites/cedram.rb +1 -2
- data/lib/math_metadata_lookup/tools.rb +1 -1
- data/math_metadata_lookup.gemspec +2 -1
- metadata +68 -79
data/README.md
CHANGED
@@ -5,14 +5,15 @@ This utility search mathematical reviews sites and fetches metadata about articl
|
|
5
5
|
It returns results as one of text, xml, html, yaml, json or ruby formats.
|
6
6
|
It can work with LaTeX accent notation.
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
* MathSciNet
|
12
|
-
* Zentralblatt
|
13
|
-
* Numdam
|
14
|
-
* Cedram
|
15
|
-
* DmlCZ
|
8
|
+
Supported sites
|
9
|
+
===============
|
10
|
+
|
11
|
+
* MathSciNet (mr)
|
12
|
+
* Zentralblatt (zbl)
|
13
|
+
* Numdam (numdam)
|
14
|
+
* Cedram (cedram)
|
15
|
+
* DmlCZ (dmlcz)
|
16
|
+
* Bulgarian DML (basbg)
|
16
17
|
|
17
18
|
|
18
19
|
Installation
|
data/Rakefile
CHANGED
@@ -6,6 +6,7 @@ $KCODE='UTF8' if RUBY_VERSION < "1.9"
|
|
6
6
|
|
7
7
|
require 'rake/gempackagetask'
|
8
8
|
require 'rake/clean'
|
9
|
+
require 'rspec/core/rake_task'
|
9
10
|
|
10
11
|
CLEAN << "coverage" << "pkg" << "README.html" << "CHANGELOG.html" << '*.rbc' << "html/" << "yardoc/"
|
11
12
|
|
@@ -17,6 +18,7 @@ task :gem do |t|
|
|
17
18
|
builder.build
|
18
19
|
end
|
19
20
|
|
21
|
+
RSpec::Core::RakeTask.new(:spec)
|
20
22
|
|
21
23
|
docs = []
|
22
24
|
|
data/bin/math_metadata_lookup
CHANGED
@@ -46,7 +46,7 @@ def print_help
|
|
46
46
|
|
47
47
|
|
48
48
|
common options:
|
49
|
-
--site, -s <mr,zbl,dmlcz,cedram,numdam>
|
49
|
+
--site, -s <mr,zbl,dmlcz,cedram,numdam,basbg> -- repeatable, sites to search on, default: all
|
50
50
|
--format, -f <text|html|xml|ruby|yaml|json> -- output format, default: text
|
51
51
|
--verbose, -v
|
52
52
|
|
@@ -51,6 +51,21 @@ module MathMetadata
|
|
51
51
|
end
|
52
52
|
|
53
53
|
|
54
|
+
def size
|
55
|
+
@metadata.size
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def num_results
|
60
|
+
@metadata.inject(0){|s,m| s += m[:result].size}
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
def results
|
65
|
+
@metadata.inject([]){|s,m| s << m[:result]}.flatten
|
66
|
+
end
|
67
|
+
|
68
|
+
|
54
69
|
def format( f=:ruby )
|
55
70
|
self.send "to_#{f}"
|
56
71
|
end
|
@@ -209,6 +209,7 @@ module MathMetadata
|
|
209
209
|
page = coder.decode(page)
|
210
210
|
end
|
211
211
|
|
212
|
+
page
|
212
213
|
end
|
213
214
|
|
214
215
|
|
@@ -232,6 +233,10 @@ module MathMetadata
|
|
232
233
|
def join_article_authors( authors )
|
233
234
|
authors.collect { |author| URI.escape MathMetadata.normalize_name(author) }.join(';%20') || ''
|
234
235
|
end
|
236
|
+
|
237
|
+
def build_article_url(title, author, year)
|
238
|
+
self.class::ARTICLE_URL % [URI.escape(title), author, year]
|
239
|
+
end
|
235
240
|
|
236
241
|
def fetch_article( args={} )
|
237
242
|
opts = {:id => nil, :title => "", :year => "", :authors => []}.merge(args)
|
@@ -242,7 +247,7 @@ module MathMetadata
|
|
242
247
|
title = '' if not title.kind_of?(String)
|
243
248
|
title = MathMetadata.normalize_text(title)
|
244
249
|
title = nwords(title) if @options[:nwords]
|
245
|
-
url =
|
250
|
+
url = build_article_url title, author, opts[:year].to_s
|
246
251
|
end
|
247
252
|
|
248
253
|
fetch_page(url, opts)
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# -*-: coding: utf-8 -*-
|
2
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
|
+
|
4
|
+
module MathMetadata
|
5
|
+
|
6
|
+
# Bulgarian DML
|
7
|
+
# does not support author search
|
8
|
+
class BasBg < Site
|
9
|
+
ID = :basbg
|
10
|
+
NAME = "bas-bg"
|
11
|
+
URL = "http://sci-gems.math.bas.bg:8080/jspui/"
|
12
|
+
|
13
|
+
|
14
|
+
# AUTHOR_URL % "Author, Name"
|
15
|
+
AUTHOR_URL = %~~
|
16
|
+
|
17
|
+
AUTHORS_RE = %r{}mi
|
18
|
+
AUTHOR_RE = %r{}mi
|
19
|
+
|
20
|
+
|
21
|
+
ARTICLE_ID_URL = "http://sci-gems.math.bas.bg:8080/jspui/handle/%s"
|
22
|
+
ARTICLE_URL = "http://sci-gems.math.bas.bg:8080/jspui/simple-search?query=%s&from_advanced=true"
|
23
|
+
|
24
|
+
LIST_OF_ARTICLES_RE = %r{<table align="center" class="miscTable" summary="This table browses all dspace content">(.*?)</table>}mi
|
25
|
+
ARTICLE_ENTRY_RE = %r{<tr>.*?href="/jspui/handle/([^"]+)".*?</tr>}mi
|
26
|
+
#ARTICLE_ENTRY_RE = %r{<div class="headlineText">\s*<a href="/mathscinet/search/publdoc.html[^"]+">\s*<strong>\s*([^< ]+)\s*</strong>\s*<strong>}mi
|
27
|
+
|
28
|
+
ARTICLE_ID_RE = %r{<meta\s*name="DC.identifier"\s*content="http://hdl.handle.net/([^"]+)".*?/>}mi
|
29
|
+
ARTICLE_TITLE_RE = %r{<meta\s*name="dc.Title"\s*content="([^"]+)".*?/>}mi
|
30
|
+
ARTICLE_LANGUAGE_RE = %r{<meta\s*name="dc.language"\s*content="([^"]+)".*?/>}mi
|
31
|
+
ARTICLE_AUTHORS_RE = %r{<head>(.*?)</head>}mi
|
32
|
+
ARTICLE_AUTHOR_RE = %r{<meta\s* name="dc.creator"\s*content="([^"]+)".*?/>}mi
|
33
|
+
ARTICLE_MSCS_RE = %r{<meta.*?Classification:\s*(.*?)\s*".*?/>}mi
|
34
|
+
ARTICLE_MSC_RE = %r{([^,]+)}mi
|
35
|
+
ARTICLE_PUBLICATION_RE = %r{<tr>\s*<td\s*class="metadataFieldLabel">\s*Appears in Collections:\s*</td><td\s*class="metadataFieldValue">\s*<a href="[^"]*">\s*(.*?)\s*</a>.*?</tr>}mi
|
36
|
+
ARTICLE_PUBLISHER_RE = %r{<meta\s*name="DC.publisher"\s*content="([^"]+)".*?/>}mi
|
37
|
+
ARTICLE_RANGE_RE = %r{<tr>\s*<td class="label">\s*Pages:\s*</td>\s*<td\s*class="value">([^ <]+)</td>\s*</tr>}mi
|
38
|
+
ARTICLE_YEAR_RE = %r{td\s*class="metadataFieldLabel">\s*Issue Date:.*?</td>\s*<td\s*class="metadataFieldValue">\s*(.*?)\s*</td>}mi
|
39
|
+
ARTICLE_ISSNS_RE = %r{<center><table\s*class="itemDisplayTable">(.*?)</table>}mi
|
40
|
+
ARTICLE_ISSN_RE = %r{<td\s*class="metadataFieldLabel">\s*ISSN:.*?</td>\s*<td\s*class="metadataFieldValue">\s*(.*?)\s*</td>}mi
|
41
|
+
ARTICLE_KEYWORDS_RE = %r{<head>(.*?)</head>}mi
|
42
|
+
ARTICLE_KEYWORD_RE = %r{<meta\s*name="dc.subject"\s*content="([^"]+)".*?/>}mi
|
43
|
+
ARTICLE_REFERENCES_RE = %r{<table\s*xmlns:fn="http://www.w3.org/2003/11/xpath-functions"\s*class="dml_detail_view">(.*?)</table>}mi
|
44
|
+
ARTICLE_REFERENCE_RE = %r{<tr>\s*<td class="label">Reference:\s*</td>\s*<td class="value">\s*\[[^\]]+\]\s*([^<]+)</td>\s*</tr>}mi
|
45
|
+
|
46
|
+
def build_article_url(title, author, year)
|
47
|
+
prep_query = lambda{|prefix,str| str.to_s.split(/ +/).map{|t| "#{prefix}%3A#{URI.escape(t)}"}.join("+")}
|
48
|
+
query = "((%s)+AND+(%s))" % [ prep_query.call("title", title), prep_query.call("author", author)]
|
49
|
+
self.class::ARTICLE_URL % [query, author]
|
50
|
+
end
|
51
|
+
|
52
|
+
def join_article_authors( authors )
|
53
|
+
authors.join(" ")
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
@@ -126,8 +126,7 @@ module MathMetadata
|
|
126
126
|
form["au_op"] = "and"
|
127
127
|
form["bibitems.text_op"] = "and"
|
128
128
|
|
129
|
-
url =
|
130
|
-
# url = self.class::ARTICLE_URL
|
129
|
+
url = build_article_url title, author, opts[:year].to_s
|
131
130
|
else
|
132
131
|
return fetch_page(url, opts)
|
133
132
|
end
|
@@ -13,12 +13,13 @@ require 'find'
|
|
13
13
|
s.email = "pejuko@gmail.com"
|
14
14
|
s.authors = ["Petr Kovar"]
|
15
15
|
s.name = 'math_metadata_lookup'
|
16
|
-
s.version = '0.2.
|
16
|
+
s.version = '0.2.1'
|
17
17
|
s.date = Time.now.strftime("%Y-%m-%d")
|
18
18
|
s.add_dependency('unicode')
|
19
19
|
s.add_dependency('unidecoder')
|
20
20
|
s.add_dependency('ya2yaml')
|
21
21
|
s.add_dependency('json')
|
22
|
+
s.add_dependency('htmlentities')
|
22
23
|
s.require_path = 'lib'
|
23
24
|
s.files = ["bin/math_metadata_lookup", "README.md", "math_metadata_lookup.gemspec", "TODO", "Rakefile"]
|
24
25
|
s.files += Dir["lib/**/*.rb", "resources/*"]
|
metadata
CHANGED
@@ -1,86 +1,83 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: math_metadata_lookup
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 2
|
8
|
-
- 0
|
9
|
-
version: 0.2.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.1
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Petr Kovar
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-02-24 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: unicode
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &21417260 !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
- 0
|
30
|
-
version: "0"
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
31
22
|
type: :runtime
|
32
|
-
version_requirements: *id001
|
33
|
-
- !ruby/object:Gem::Dependency
|
34
|
-
name: unidecoder
|
35
23
|
prerelease: false
|
36
|
-
|
24
|
+
version_requirements: *21417260
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: unidecoder
|
27
|
+
requirement: &21416480 !ruby/object:Gem::Requirement
|
37
28
|
none: false
|
38
|
-
requirements:
|
39
|
-
- -
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
|
42
|
-
- 0
|
43
|
-
version: "0"
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
44
33
|
type: :runtime
|
45
|
-
version_requirements: *id002
|
46
|
-
- !ruby/object:Gem::Dependency
|
47
|
-
name: ya2yaml
|
48
34
|
prerelease: false
|
49
|
-
|
35
|
+
version_requirements: *21416480
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: ya2yaml
|
38
|
+
requirement: &21415880 !ruby/object:Gem::Requirement
|
50
39
|
none: false
|
51
|
-
requirements:
|
52
|
-
- -
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
|
55
|
-
- 0
|
56
|
-
version: "0"
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
57
44
|
type: :runtime
|
58
|
-
|
59
|
-
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *21415880
|
47
|
+
- !ruby/object:Gem::Dependency
|
60
48
|
name: json
|
49
|
+
requirement: &21415320 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :runtime
|
61
56
|
prerelease: false
|
62
|
-
|
57
|
+
version_requirements: *21415320
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: htmlentities
|
60
|
+
requirement: &21414800 !ruby/object:Gem::Requirement
|
63
61
|
none: false
|
64
|
-
requirements:
|
65
|
-
- -
|
66
|
-
- !ruby/object:Gem::Version
|
67
|
-
|
68
|
-
- 0
|
69
|
-
version: "0"
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
70
66
|
type: :runtime
|
71
|
-
|
72
|
-
|
73
|
-
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *21414800
|
69
|
+
description: ! 'This utility/library search mathematical reviews sites and fetches
|
70
|
+
metadata about articles.
|
71
|
+
|
74
72
|
It can return results as one of text, xml, html, yaml, json or ruby formats.
|
75
73
|
|
74
|
+
'
|
76
75
|
email: pejuko@gmail.com
|
77
|
-
executables:
|
76
|
+
executables:
|
78
77
|
- math_metadata_lookup
|
79
78
|
extensions: []
|
80
|
-
|
81
79
|
extra_rdoc_files: []
|
82
|
-
|
83
|
-
files:
|
80
|
+
files:
|
84
81
|
- bin/math_metadata_lookup
|
85
82
|
- README.md
|
86
83
|
- math_metadata_lookup.gemspec
|
@@ -92,6 +89,7 @@ files:
|
|
92
89
|
- lib/math_metadata_lookup/tools.rb
|
93
90
|
- lib/math_metadata_lookup/lookup.rb
|
94
91
|
- lib/math_metadata_lookup/result.rb
|
92
|
+
- lib/math_metadata_lookup/sites/bas-bg.rb
|
95
93
|
- lib/math_metadata_lookup/sites/numdam.rb
|
96
94
|
- lib/math_metadata_lookup/sites/mr.rb
|
97
95
|
- lib/math_metadata_lookup/sites/zbl.rb
|
@@ -101,37 +99,28 @@ files:
|
|
101
99
|
- lib/math_metadata_lookup/entity.rb
|
102
100
|
- lib/math_metadata_lookup/author.rb
|
103
101
|
- resources/math_metadata_lookup.js
|
104
|
-
has_rdoc: true
|
105
102
|
homepage: http://github.com/pejuko/math_metadata_lookup
|
106
103
|
licenses: []
|
107
|
-
|
108
104
|
post_install_message:
|
109
105
|
rdoc_options: []
|
110
|
-
|
111
|
-
require_paths:
|
106
|
+
require_paths:
|
112
107
|
- lib
|
113
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
109
|
none: false
|
115
|
-
requirements:
|
116
|
-
- -
|
117
|
-
- !ruby/object:Gem::Version
|
118
|
-
|
119
|
-
|
120
|
-
version: "0"
|
121
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - ! '>='
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
114
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
122
115
|
none: false
|
123
|
-
requirements:
|
124
|
-
- -
|
125
|
-
- !ruby/object:Gem::Version
|
126
|
-
|
127
|
-
- 0
|
128
|
-
version: "0"
|
116
|
+
requirements:
|
117
|
+
- - ! '>='
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
129
120
|
requirements: []
|
130
|
-
|
131
121
|
rubyforge_project:
|
132
|
-
rubygems_version: 1.
|
122
|
+
rubygems_version: 1.8.10
|
133
123
|
signing_key:
|
134
124
|
specification_version: 3
|
135
125
|
summary: Search mathematical reviews sites and fetches metadata about articles.
|
136
126
|
test_files: []
|
137
|
-
|