math_metadata_lookup 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +9 -8
- data/Rakefile +2 -0
- data/bin/math_metadata_lookup +1 -1
- data/lib/math_metadata_lookup/result.rb +15 -0
- data/lib/math_metadata_lookup/site.rb +6 -1
- data/lib/math_metadata_lookup/sites/bas-bg.rb +58 -0
- data/lib/math_metadata_lookup/sites/cedram.rb +1 -2
- data/lib/math_metadata_lookup/tools.rb +1 -1
- data/math_metadata_lookup.gemspec +2 -1
- metadata +68 -79
data/README.md
CHANGED
@@ -5,14 +5,15 @@ This utility search mathematical reviews sites and fetches metadata about articl
|
|
5
5
|
It returns results as one of text, xml, html, yaml, json or ruby formats.
|
6
6
|
It can work with LaTeX accent notation.
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
* MathSciNet
|
12
|
-
* Zentralblatt
|
13
|
-
* Numdam
|
14
|
-
* Cedram
|
15
|
-
* DmlCZ
|
8
|
+
Supported sites
|
9
|
+
===============
|
10
|
+
|
11
|
+
* MathSciNet (mr)
|
12
|
+
* Zentralblatt (zbl)
|
13
|
+
* Numdam (numdam)
|
14
|
+
* Cedram (cedram)
|
15
|
+
* DmlCZ (dmlcz)
|
16
|
+
* Bulgarian DML (basbg)
|
16
17
|
|
17
18
|
|
18
19
|
Installation
|
data/Rakefile
CHANGED
@@ -6,6 +6,7 @@ $KCODE='UTF8' if RUBY_VERSION < "1.9"
|
|
6
6
|
|
7
7
|
require 'rake/gempackagetask'
|
8
8
|
require 'rake/clean'
|
9
|
+
require 'rspec/core/rake_task'
|
9
10
|
|
10
11
|
CLEAN << "coverage" << "pkg" << "README.html" << "CHANGELOG.html" << '*.rbc' << "html/" << "yardoc/"
|
11
12
|
|
@@ -17,6 +18,7 @@ task :gem do |t|
|
|
17
18
|
builder.build
|
18
19
|
end
|
19
20
|
|
21
|
+
RSpec::Core::RakeTask.new(:spec)
|
20
22
|
|
21
23
|
docs = []
|
22
24
|
|
data/bin/math_metadata_lookup
CHANGED
@@ -46,7 +46,7 @@ def print_help
|
|
46
46
|
|
47
47
|
|
48
48
|
common options:
|
49
|
-
--site, -s <mr,zbl,dmlcz,cedram,numdam>
|
49
|
+
--site, -s <mr,zbl,dmlcz,cedram,numdam,basbg> -- repeatable, sites to search on, default: all
|
50
50
|
--format, -f <text|html|xml|ruby|yaml|json> -- output format, default: text
|
51
51
|
--verbose, -v
|
52
52
|
|
@@ -51,6 +51,21 @@ module MathMetadata
|
|
51
51
|
end
|
52
52
|
|
53
53
|
|
54
|
+
def size
|
55
|
+
@metadata.size
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def num_results
|
60
|
+
@metadata.inject(0){|s,m| s += m[:result].size}
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
def results
|
65
|
+
@metadata.inject([]){|s,m| s << m[:result]}.flatten
|
66
|
+
end
|
67
|
+
|
68
|
+
|
54
69
|
def format( f=:ruby )
|
55
70
|
self.send "to_#{f}"
|
56
71
|
end
|
@@ -209,6 +209,7 @@ module MathMetadata
|
|
209
209
|
page = coder.decode(page)
|
210
210
|
end
|
211
211
|
|
212
|
+
page
|
212
213
|
end
|
213
214
|
|
214
215
|
|
@@ -232,6 +233,10 @@ module MathMetadata
|
|
232
233
|
def join_article_authors( authors )
|
233
234
|
authors.collect { |author| URI.escape MathMetadata.normalize_name(author) }.join(';%20') || ''
|
234
235
|
end
|
236
|
+
|
237
|
+
def build_article_url(title, author, year)
|
238
|
+
self.class::ARTICLE_URL % [URI.escape(title), author, year]
|
239
|
+
end
|
235
240
|
|
236
241
|
def fetch_article( args={} )
|
237
242
|
opts = {:id => nil, :title => "", :year => "", :authors => []}.merge(args)
|
@@ -242,7 +247,7 @@ module MathMetadata
|
|
242
247
|
title = '' if not title.kind_of?(String)
|
243
248
|
title = MathMetadata.normalize_text(title)
|
244
249
|
title = nwords(title) if @options[:nwords]
|
245
|
-
url =
|
250
|
+
url = build_article_url title, author, opts[:year].to_s
|
246
251
|
end
|
247
252
|
|
248
253
|
fetch_page(url, opts)
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# -*-: coding: utf-8 -*-
|
2
|
+
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
|
+
|
4
|
+
module MathMetadata
|
5
|
+
|
6
|
+
# Bulgarian DML
|
7
|
+
# does not support author search
|
8
|
+
class BasBg < Site
|
9
|
+
ID = :basbg
|
10
|
+
NAME = "bas-bg"
|
11
|
+
URL = "http://sci-gems.math.bas.bg:8080/jspui/"
|
12
|
+
|
13
|
+
|
14
|
+
# AUTHOR_URL % "Author, Name"
|
15
|
+
AUTHOR_URL = %~~
|
16
|
+
|
17
|
+
AUTHORS_RE = %r{}mi
|
18
|
+
AUTHOR_RE = %r{}mi
|
19
|
+
|
20
|
+
|
21
|
+
ARTICLE_ID_URL = "http://sci-gems.math.bas.bg:8080/jspui/handle/%s"
|
22
|
+
ARTICLE_URL = "http://sci-gems.math.bas.bg:8080/jspui/simple-search?query=%s&from_advanced=true"
|
23
|
+
|
24
|
+
LIST_OF_ARTICLES_RE = %r{<table align="center" class="miscTable" summary="This table browses all dspace content">(.*?)</table>}mi
|
25
|
+
ARTICLE_ENTRY_RE = %r{<tr>.*?href="/jspui/handle/([^"]+)".*?</tr>}mi
|
26
|
+
#ARTICLE_ENTRY_RE = %r{<div class="headlineText">\s*<a href="/mathscinet/search/publdoc.html[^"]+">\s*<strong>\s*([^< ]+)\s*</strong>\s*<strong>}mi
|
27
|
+
|
28
|
+
ARTICLE_ID_RE = %r{<meta\s*name="DC.identifier"\s*content="http://hdl.handle.net/([^"]+)".*?/>}mi
|
29
|
+
ARTICLE_TITLE_RE = %r{<meta\s*name="dc.Title"\s*content="([^"]+)".*?/>}mi
|
30
|
+
ARTICLE_LANGUAGE_RE = %r{<meta\s*name="dc.language"\s*content="([^"]+)".*?/>}mi
|
31
|
+
ARTICLE_AUTHORS_RE = %r{<head>(.*?)</head>}mi
|
32
|
+
ARTICLE_AUTHOR_RE = %r{<meta\s* name="dc.creator"\s*content="([^"]+)".*?/>}mi
|
33
|
+
ARTICLE_MSCS_RE = %r{<meta.*?Classification:\s*(.*?)\s*".*?/>}mi
|
34
|
+
ARTICLE_MSC_RE = %r{([^,]+)}mi
|
35
|
+
ARTICLE_PUBLICATION_RE = %r{<tr>\s*<td\s*class="metadataFieldLabel">\s*Appears in Collections:\s*</td><td\s*class="metadataFieldValue">\s*<a href="[^"]*">\s*(.*?)\s*</a>.*?</tr>}mi
|
36
|
+
ARTICLE_PUBLISHER_RE = %r{<meta\s*name="DC.publisher"\s*content="([^"]+)".*?/>}mi
|
37
|
+
ARTICLE_RANGE_RE = %r{<tr>\s*<td class="label">\s*Pages:\s*</td>\s*<td\s*class="value">([^ <]+)</td>\s*</tr>}mi
|
38
|
+
ARTICLE_YEAR_RE = %r{td\s*class="metadataFieldLabel">\s*Issue Date:.*?</td>\s*<td\s*class="metadataFieldValue">\s*(.*?)\s*</td>}mi
|
39
|
+
ARTICLE_ISSNS_RE = %r{<center><table\s*class="itemDisplayTable">(.*?)</table>}mi
|
40
|
+
ARTICLE_ISSN_RE = %r{<td\s*class="metadataFieldLabel">\s*ISSN:.*?</td>\s*<td\s*class="metadataFieldValue">\s*(.*?)\s*</td>}mi
|
41
|
+
ARTICLE_KEYWORDS_RE = %r{<head>(.*?)</head>}mi
|
42
|
+
ARTICLE_KEYWORD_RE = %r{<meta\s*name="dc.subject"\s*content="([^"]+)".*?/>}mi
|
43
|
+
ARTICLE_REFERENCES_RE = %r{<table\s*xmlns:fn="http://www.w3.org/2003/11/xpath-functions"\s*class="dml_detail_view">(.*?)</table>}mi
|
44
|
+
ARTICLE_REFERENCE_RE = %r{<tr>\s*<td class="label">Reference:\s*</td>\s*<td class="value">\s*\[[^\]]+\]\s*([^<]+)</td>\s*</tr>}mi
|
45
|
+
|
46
|
+
def build_article_url(title, author, year)
|
47
|
+
prep_query = lambda{|prefix,str| str.to_s.split(/ +/).map{|t| "#{prefix}%3A#{URI.escape(t)}"}.join("+")}
|
48
|
+
query = "((%s)+AND+(%s))" % [ prep_query.call("title", title), prep_query.call("author", author)]
|
49
|
+
self.class::ARTICLE_URL % [query, author]
|
50
|
+
end
|
51
|
+
|
52
|
+
def join_article_authors( authors )
|
53
|
+
authors.join(" ")
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
@@ -126,8 +126,7 @@ module MathMetadata
|
|
126
126
|
form["au_op"] = "and"
|
127
127
|
form["bibitems.text_op"] = "and"
|
128
128
|
|
129
|
-
url =
|
130
|
-
# url = self.class::ARTICLE_URL
|
129
|
+
url = build_article_url title, author, opts[:year].to_s
|
131
130
|
else
|
132
131
|
return fetch_page(url, opts)
|
133
132
|
end
|
@@ -13,12 +13,13 @@ require 'find'
|
|
13
13
|
s.email = "pejuko@gmail.com"
|
14
14
|
s.authors = ["Petr Kovar"]
|
15
15
|
s.name = 'math_metadata_lookup'
|
16
|
-
s.version = '0.2.
|
16
|
+
s.version = '0.2.1'
|
17
17
|
s.date = Time.now.strftime("%Y-%m-%d")
|
18
18
|
s.add_dependency('unicode')
|
19
19
|
s.add_dependency('unidecoder')
|
20
20
|
s.add_dependency('ya2yaml')
|
21
21
|
s.add_dependency('json')
|
22
|
+
s.add_dependency('htmlentities')
|
22
23
|
s.require_path = 'lib'
|
23
24
|
s.files = ["bin/math_metadata_lookup", "README.md", "math_metadata_lookup.gemspec", "TODO", "Rakefile"]
|
24
25
|
s.files += Dir["lib/**/*.rb", "resources/*"]
|
metadata
CHANGED
@@ -1,86 +1,83 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: math_metadata_lookup
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 2
|
8
|
-
- 0
|
9
|
-
version: 0.2.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.1
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Petr Kovar
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-02-24 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: unicode
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &21417260 !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
- 0
|
30
|
-
version: "0"
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
31
22
|
type: :runtime
|
32
|
-
version_requirements: *id001
|
33
|
-
- !ruby/object:Gem::Dependency
|
34
|
-
name: unidecoder
|
35
23
|
prerelease: false
|
36
|
-
|
24
|
+
version_requirements: *21417260
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: unidecoder
|
27
|
+
requirement: &21416480 !ruby/object:Gem::Requirement
|
37
28
|
none: false
|
38
|
-
requirements:
|
39
|
-
- -
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
|
42
|
-
- 0
|
43
|
-
version: "0"
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
44
33
|
type: :runtime
|
45
|
-
version_requirements: *id002
|
46
|
-
- !ruby/object:Gem::Dependency
|
47
|
-
name: ya2yaml
|
48
34
|
prerelease: false
|
49
|
-
|
35
|
+
version_requirements: *21416480
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: ya2yaml
|
38
|
+
requirement: &21415880 !ruby/object:Gem::Requirement
|
50
39
|
none: false
|
51
|
-
requirements:
|
52
|
-
- -
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
|
55
|
-
- 0
|
56
|
-
version: "0"
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
57
44
|
type: :runtime
|
58
|
-
|
59
|
-
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *21415880
|
47
|
+
- !ruby/object:Gem::Dependency
|
60
48
|
name: json
|
49
|
+
requirement: &21415320 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :runtime
|
61
56
|
prerelease: false
|
62
|
-
|
57
|
+
version_requirements: *21415320
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: htmlentities
|
60
|
+
requirement: &21414800 !ruby/object:Gem::Requirement
|
63
61
|
none: false
|
64
|
-
requirements:
|
65
|
-
- -
|
66
|
-
- !ruby/object:Gem::Version
|
67
|
-
|
68
|
-
- 0
|
69
|
-
version: "0"
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
70
66
|
type: :runtime
|
71
|
-
|
72
|
-
|
73
|
-
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *21414800
|
69
|
+
description: ! 'This utility/library search mathematical reviews sites and fetches
|
70
|
+
metadata about articles.
|
71
|
+
|
74
72
|
It can return results as one of text, xml, html, yaml, json or ruby formats.
|
75
73
|
|
74
|
+
'
|
76
75
|
email: pejuko@gmail.com
|
77
|
-
executables:
|
76
|
+
executables:
|
78
77
|
- math_metadata_lookup
|
79
78
|
extensions: []
|
80
|
-
|
81
79
|
extra_rdoc_files: []
|
82
|
-
|
83
|
-
files:
|
80
|
+
files:
|
84
81
|
- bin/math_metadata_lookup
|
85
82
|
- README.md
|
86
83
|
- math_metadata_lookup.gemspec
|
@@ -92,6 +89,7 @@ files:
|
|
92
89
|
- lib/math_metadata_lookup/tools.rb
|
93
90
|
- lib/math_metadata_lookup/lookup.rb
|
94
91
|
- lib/math_metadata_lookup/result.rb
|
92
|
+
- lib/math_metadata_lookup/sites/bas-bg.rb
|
95
93
|
- lib/math_metadata_lookup/sites/numdam.rb
|
96
94
|
- lib/math_metadata_lookup/sites/mr.rb
|
97
95
|
- lib/math_metadata_lookup/sites/zbl.rb
|
@@ -101,37 +99,28 @@ files:
|
|
101
99
|
- lib/math_metadata_lookup/entity.rb
|
102
100
|
- lib/math_metadata_lookup/author.rb
|
103
101
|
- resources/math_metadata_lookup.js
|
104
|
-
has_rdoc: true
|
105
102
|
homepage: http://github.com/pejuko/math_metadata_lookup
|
106
103
|
licenses: []
|
107
|
-
|
108
104
|
post_install_message:
|
109
105
|
rdoc_options: []
|
110
|
-
|
111
|
-
require_paths:
|
106
|
+
require_paths:
|
112
107
|
- lib
|
113
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
109
|
none: false
|
115
|
-
requirements:
|
116
|
-
- -
|
117
|
-
- !ruby/object:Gem::Version
|
118
|
-
|
119
|
-
|
120
|
-
version: "0"
|
121
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - ! '>='
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
114
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
122
115
|
none: false
|
123
|
-
requirements:
|
124
|
-
- -
|
125
|
-
- !ruby/object:Gem::Version
|
126
|
-
|
127
|
-
- 0
|
128
|
-
version: "0"
|
116
|
+
requirements:
|
117
|
+
- - ! '>='
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
129
120
|
requirements: []
|
130
|
-
|
131
121
|
rubyforge_project:
|
132
|
-
rubygems_version: 1.
|
122
|
+
rubygems_version: 1.8.10
|
133
123
|
signing_key:
|
134
124
|
specification_version: 3
|
135
125
|
summary: Search mathematical reviews sites and fetches metadata about articles.
|
136
126
|
test_files: []
|
137
|
-
|