math_metadata_lookup 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -5,14 +5,15 @@ This utility search mathematical reviews sites and fetches metadata about articl
5
5
  It returns results as one of text, xml, html, yaml, json or ruby formats.
6
6
  It can work with LaTeX accent notation.
7
7
 
8
- Sites
9
- =====
10
-
11
- * MathSciNet
12
- * Zentralblatt
13
- * Numdam
14
- * Cedram
15
- * DmlCZ
8
+ Supported sites
9
+ ===============
10
+
11
+ * MathSciNet (mr)
12
+ * Zentralblatt (zbl)
13
+ * Numdam (numdam)
14
+ * Cedram (cedram)
15
+ * DmlCZ (dmlcz)
16
+ * Bulgarian DML (basbg)
16
17
 
17
18
 
18
19
  Installation
data/Rakefile CHANGED
@@ -6,6 +6,7 @@ $KCODE='UTF8' if RUBY_VERSION < "1.9"
6
6
 
7
7
  require 'rake/gempackagetask'
8
8
  require 'rake/clean'
9
+ require 'rspec/core/rake_task'
9
10
 
10
11
  CLEAN << "coverage" << "pkg" << "README.html" << "CHANGELOG.html" << '*.rbc' << "html/" << "yardoc/"
11
12
 
@@ -17,6 +18,7 @@ task :gem do |t|
17
18
  builder.build
18
19
  end
19
20
 
21
+ RSpec::Core::RakeTask.new(:spec)
20
22
 
21
23
  docs = []
22
24
 
@@ -46,7 +46,7 @@ def print_help
46
46
 
47
47
 
48
48
  common options:
49
- --site, -s <mr,zbl,dmlcz,cedram,numdam> -- repeatable, sites to search on, default: all
49
+ --site, -s <mr,zbl,dmlcz,cedram,numdam,basbg> -- repeatable, sites to search on, default: all
50
50
  --format, -f <text|html|xml|ruby|yaml|json> -- output format, default: text
51
51
  --verbose, -v
52
52
 
@@ -51,6 +51,21 @@ module MathMetadata
51
51
  end
52
52
 
53
53
 
54
+ def size
55
+ @metadata.size
56
+ end
57
+
58
+
59
+ def num_results
60
+ @metadata.inject(0){|s,m| s += m[:result].size}
61
+ end
62
+
63
+
64
+ def results
65
+ @metadata.inject([]){|s,m| s << m[:result]}.flatten
66
+ end
67
+
68
+
54
69
  def format( f=:ruby )
55
70
  self.send "to_#{f}"
56
71
  end
@@ -209,6 +209,7 @@ module MathMetadata
209
209
  page = coder.decode(page)
210
210
  end
211
211
 
212
+ page
212
213
  end
213
214
 
214
215
 
@@ -232,6 +233,10 @@ module MathMetadata
232
233
  def join_article_authors( authors )
233
234
  authors.collect { |author| URI.escape MathMetadata.normalize_name(author) }.join(';%20') || ''
234
235
  end
236
+
237
+ def build_article_url(title, author, year)
238
+ self.class::ARTICLE_URL % [URI.escape(title), author, year]
239
+ end
235
240
 
236
241
  def fetch_article( args={} )
237
242
  opts = {:id => nil, :title => "", :year => "", :authors => []}.merge(args)
@@ -242,7 +247,7 @@ module MathMetadata
242
247
  title = '' if not title.kind_of?(String)
243
248
  title = MathMetadata.normalize_text(title)
244
249
  title = nwords(title) if @options[:nwords]
245
- url = self.class::ARTICLE_URL % [URI.escape(title), author, opts[:year].to_s]
250
+ url = build_article_url title, author, opts[:year].to_s
246
251
  end
247
252
 
248
253
  fetch_page(url, opts)
@@ -0,0 +1,58 @@
1
+ # -*-: coding: utf-8 -*-
2
+ # vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
3
+
4
+ module MathMetadata
5
+
6
+ # Bulgarian DML
7
+ # does not support author search
8
+ class BasBg < Site
9
+ ID = :basbg
10
+ NAME = "bas-bg"
11
+ URL = "http://sci-gems.math.bas.bg:8080/jspui/"
12
+
13
+
14
+ # AUTHOR_URL % "Author, Name"
15
+ AUTHOR_URL = %~~
16
+
17
+ AUTHORS_RE = %r{}mi
18
+ AUTHOR_RE = %r{}mi
19
+
20
+
21
+ ARTICLE_ID_URL = "http://sci-gems.math.bas.bg:8080/jspui/handle/%s"
22
+ ARTICLE_URL = "http://sci-gems.math.bas.bg:8080/jspui/simple-search?query=%s&from_advanced=true"
23
+
24
+ LIST_OF_ARTICLES_RE = %r{<table align="center" class="miscTable" summary="This table browses all dspace content">(.*?)</table>}mi
25
+ ARTICLE_ENTRY_RE = %r{<tr>.*?href="/jspui/handle/([^"]+)".*?</tr>}mi
26
+ #ARTICLE_ENTRY_RE = %r{<div class="headlineText">\s*<a href="/mathscinet/search/publdoc.html[^"]+">\s*<strong>\s*([^< ]+)\s*</strong>\s*<strong>}mi
27
+
28
+ ARTICLE_ID_RE = %r{<meta\s*name="DC.identifier"\s*content="http://hdl.handle.net/([^"]+)".*?/>}mi
29
+ ARTICLE_TITLE_RE = %r{<meta\s*name="dc.Title"\s*content="([^"]+)".*?/>}mi
30
+ ARTICLE_LANGUAGE_RE = %r{<meta\s*name="dc.language"\s*content="([^"]+)".*?/>}mi
31
+ ARTICLE_AUTHORS_RE = %r{<head>(.*?)</head>}mi
32
+ ARTICLE_AUTHOR_RE = %r{<meta\s* name="dc.creator"\s*content="([^"]+)".*?/>}mi
33
+ ARTICLE_MSCS_RE = %r{<meta.*?Classification:\s*(.*?)\s*".*?/>}mi
34
+ ARTICLE_MSC_RE = %r{([^,]+)}mi
35
+ ARTICLE_PUBLICATION_RE = %r{<tr>\s*<td\s*class="metadataFieldLabel">\s*Appears in Collections:\s*</td><td\s*class="metadataFieldValue">\s*<a href="[^"]*">\s*(.*?)\s*</a>.*?</tr>}mi
36
+ ARTICLE_PUBLISHER_RE = %r{<meta\s*name="DC.publisher"\s*content="([^"]+)".*?/>}mi
37
+ ARTICLE_RANGE_RE = %r{<tr>\s*<td class="label">\s*Pages:\s*</td>\s*<td\s*class="value">([^ <]+)</td>\s*</tr>}mi
38
+ ARTICLE_YEAR_RE = %r{td\s*class="metadataFieldLabel">\s*Issue Date:.*?</td>\s*<td\s*class="metadataFieldValue">\s*(.*?)\s*</td>}mi
39
+ ARTICLE_ISSNS_RE = %r{<center><table\s*class="itemDisplayTable">(.*?)</table>}mi
40
+ ARTICLE_ISSN_RE = %r{<td\s*class="metadataFieldLabel">\s*ISSN:.*?</td>\s*<td\s*class="metadataFieldValue">\s*(.*?)\s*</td>}mi
41
+ ARTICLE_KEYWORDS_RE = %r{<head>(.*?)</head>}mi
42
+ ARTICLE_KEYWORD_RE = %r{<meta\s*name="dc.subject"\s*content="([^"]+)".*?/>}mi
43
+ ARTICLE_REFERENCES_RE = %r{<table\s*xmlns:fn="http://www.w3.org/2003/11/xpath-functions"\s*class="dml_detail_view">(.*?)</table>}mi
44
+ ARTICLE_REFERENCE_RE = %r{<tr>\s*<td class="label">Reference:\s*</td>\s*<td class="value">\s*\[[^\]]+\]\s*([^<]+)</td>\s*</tr>}mi
45
+
46
+ def build_article_url(title, author, year)
47
+ prep_query = lambda{|prefix,str| str.to_s.split(/ +/).map{|t| "#{prefix}%3A#{URI.escape(t)}"}.join("+")}
48
+ query = "((%s)+AND+(%s))" % [ prep_query.call("title", title), prep_query.call("author", author)]
49
+ self.class::ARTICLE_URL % [query, author]
50
+ end
51
+
52
+ def join_article_authors( authors )
53
+ authors.join(" ")
54
+ end
55
+
56
+ end
57
+
58
+ end
@@ -126,8 +126,7 @@ module MathMetadata
126
126
  form["au_op"] = "and"
127
127
  form["bibitems.text_op"] = "and"
128
128
 
129
- url = self.class::ARTICLE_URL % [URI.escape(title), author, opts[:year].to_s]
130
- # url = self.class::ARTICLE_URL
129
+ url = build_article_url title, author, opts[:year].to_s
131
130
  else
132
131
  return fetch_page(url, opts)
133
132
  end
@@ -79,7 +79,7 @@ module MathMetadata
79
79
  str = str.to_ascii.downcase
80
80
  str = remove_punctuation(str)
81
81
  str.gsub!(%r{\W+}, ' ')
82
- str.gsub!(%r{(?:the|a|of|)\s+}i, ' ')
82
+ str.gsub(%r{(?: the| a| of|^a|^the|^of)\s+}i, ' ')
83
83
  str.gsub!(%r{\s+}, ' ')
84
84
  str.strip
85
85
  end
@@ -13,12 +13,13 @@ require 'find'
13
13
  s.email = "pejuko@gmail.com"
14
14
  s.authors = ["Petr Kovar"]
15
15
  s.name = 'math_metadata_lookup'
16
- s.version = '0.2.0'
16
+ s.version = '0.2.1'
17
17
  s.date = Time.now.strftime("%Y-%m-%d")
18
18
  s.add_dependency('unicode')
19
19
  s.add_dependency('unidecoder')
20
20
  s.add_dependency('ya2yaml')
21
21
  s.add_dependency('json')
22
+ s.add_dependency('htmlentities')
22
23
  s.require_path = 'lib'
23
24
  s.files = ["bin/math_metadata_lookup", "README.md", "math_metadata_lookup.gemspec", "TODO", "Rakefile"]
24
25
  s.files += Dir["lib/**/*.rb", "resources/*"]
metadata CHANGED
@@ -1,86 +1,83 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: math_metadata_lookup
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 2
8
- - 0
9
- version: 0.2.0
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Petr Kovar
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2011-06-10 00:00:00 +02:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-02-24 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: unicode
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &21417260 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
- version: "0"
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
31
22
  type: :runtime
32
- version_requirements: *id001
33
- - !ruby/object:Gem::Dependency
34
- name: unidecoder
35
23
  prerelease: false
36
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *21417260
25
+ - !ruby/object:Gem::Dependency
26
+ name: unidecoder
27
+ requirement: &21416480 !ruby/object:Gem::Requirement
37
28
  none: false
38
- requirements:
39
- - - ">="
40
- - !ruby/object:Gem::Version
41
- segments:
42
- - 0
43
- version: "0"
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
44
33
  type: :runtime
45
- version_requirements: *id002
46
- - !ruby/object:Gem::Dependency
47
- name: ya2yaml
48
34
  prerelease: false
49
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *21416480
36
+ - !ruby/object:Gem::Dependency
37
+ name: ya2yaml
38
+ requirement: &21415880 !ruby/object:Gem::Requirement
50
39
  none: false
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- segments:
55
- - 0
56
- version: "0"
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
57
44
  type: :runtime
58
- version_requirements: *id003
59
- - !ruby/object:Gem::Dependency
45
+ prerelease: false
46
+ version_requirements: *21415880
47
+ - !ruby/object:Gem::Dependency
60
48
  name: json
49
+ requirement: &21415320 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :runtime
61
56
  prerelease: false
62
- requirement: &id004 !ruby/object:Gem::Requirement
57
+ version_requirements: *21415320
58
+ - !ruby/object:Gem::Dependency
59
+ name: htmlentities
60
+ requirement: &21414800 !ruby/object:Gem::Requirement
63
61
  none: false
64
- requirements:
65
- - - ">="
66
- - !ruby/object:Gem::Version
67
- segments:
68
- - 0
69
- version: "0"
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
70
66
  type: :runtime
71
- version_requirements: *id004
72
- description: |
73
- This utility/library search mathematical reviews sites and fetches metadata about articles.
67
+ prerelease: false
68
+ version_requirements: *21414800
69
+ description: ! 'This utility/library search mathematical reviews sites and fetches
70
+ metadata about articles.
71
+
74
72
  It can return results as one of text, xml, html, yaml, json or ruby formats.
75
73
 
74
+ '
76
75
  email: pejuko@gmail.com
77
- executables:
76
+ executables:
78
77
  - math_metadata_lookup
79
78
  extensions: []
80
-
81
79
  extra_rdoc_files: []
82
-
83
- files:
80
+ files:
84
81
  - bin/math_metadata_lookup
85
82
  - README.md
86
83
  - math_metadata_lookup.gemspec
@@ -92,6 +89,7 @@ files:
92
89
  - lib/math_metadata_lookup/tools.rb
93
90
  - lib/math_metadata_lookup/lookup.rb
94
91
  - lib/math_metadata_lookup/result.rb
92
+ - lib/math_metadata_lookup/sites/bas-bg.rb
95
93
  - lib/math_metadata_lookup/sites/numdam.rb
96
94
  - lib/math_metadata_lookup/sites/mr.rb
97
95
  - lib/math_metadata_lookup/sites/zbl.rb
@@ -101,37 +99,28 @@ files:
101
99
  - lib/math_metadata_lookup/entity.rb
102
100
  - lib/math_metadata_lookup/author.rb
103
101
  - resources/math_metadata_lookup.js
104
- has_rdoc: true
105
102
  homepage: http://github.com/pejuko/math_metadata_lookup
106
103
  licenses: []
107
-
108
104
  post_install_message:
109
105
  rdoc_options: []
110
-
111
- require_paths:
106
+ require_paths:
112
107
  - lib
113
- required_ruby_version: !ruby/object:Gem::Requirement
108
+ required_ruby_version: !ruby/object:Gem::Requirement
114
109
  none: false
115
- requirements:
116
- - - ">="
117
- - !ruby/object:Gem::Version
118
- segments:
119
- - 0
120
- version: "0"
121
- required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ required_rubygems_version: !ruby/object:Gem::Requirement
122
115
  none: false
123
- requirements:
124
- - - ">="
125
- - !ruby/object:Gem::Version
126
- segments:
127
- - 0
128
- version: "0"
116
+ requirements:
117
+ - - ! '>='
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
129
120
  requirements: []
130
-
131
121
  rubyforge_project:
132
- rubygems_version: 1.3.7
122
+ rubygems_version: 1.8.10
133
123
  signing_key:
134
124
  specification_version: 3
135
125
  summary: Search mathematical reviews sites and fetches metadata about articles.
136
126
  test_files: []
137
-