math_metadata_lookup 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -5,14 +5,15 @@ This utility search mathematical reviews sites and fetches metadata about articl
5
5
  It returns results as one of text, xml, html, yaml, json or ruby formats.
6
6
  It can work with LaTeX accent notation.
7
7
 
8
- Sites
9
- =====
10
-
11
- * MathSciNet
12
- * Zentralblatt
13
- * Numdam
14
- * Cedram
15
- * DmlCZ
8
+ Supported sites
9
+ ===============
10
+
11
+ * MathSciNet (mr)
12
+ * Zentralblatt (zbl)
13
+ * Numdam (numdam)
14
+ * Cedram (cedram)
15
+ * DmlCZ (dmlcz)
16
+ * Bulgarian DML (basbg)
16
17
 
17
18
 
18
19
  Installation
data/Rakefile CHANGED
@@ -6,6 +6,7 @@ $KCODE='UTF8' if RUBY_VERSION < "1.9"
6
6
 
7
7
  require 'rake/gempackagetask'
8
8
  require 'rake/clean'
9
+ require 'rspec/core/rake_task'
9
10
 
10
11
  CLEAN << "coverage" << "pkg" << "README.html" << "CHANGELOG.html" << '*.rbc' << "html/" << "yardoc/"
11
12
 
@@ -17,6 +18,7 @@ task :gem do |t|
17
18
  builder.build
18
19
  end
19
20
 
21
+ RSpec::Core::RakeTask.new(:spec)
20
22
 
21
23
  docs = []
22
24
 
@@ -46,7 +46,7 @@ def print_help
46
46
 
47
47
 
48
48
  common options:
49
- --site, -s <mr,zbl,dmlcz,cedram,numdam> -- repeatable, sites to search on, default: all
49
+ --site, -s <mr,zbl,dmlcz,cedram,numdam,basbg> -- repeatable, sites to search on, default: all
50
50
  --format, -f <text|html|xml|ruby|yaml|json> -- output format, default: text
51
51
  --verbose, -v
52
52
 
@@ -51,6 +51,21 @@ module MathMetadata
51
51
  end
52
52
 
53
53
 
54
+ def size
55
+ @metadata.size
56
+ end
57
+
58
+
59
+ def num_results
60
+ @metadata.inject(0){|s,m| s += m[:result].size}
61
+ end
62
+
63
+
64
+ def results
65
+ @metadata.inject([]){|s,m| s << m[:result]}.flatten
66
+ end
67
+
68
+
54
69
  def format( f=:ruby )
55
70
  self.send "to_#{f}"
56
71
  end
@@ -209,6 +209,7 @@ module MathMetadata
209
209
  page = coder.decode(page)
210
210
  end
211
211
 
212
+ page
212
213
  end
213
214
 
214
215
 
@@ -232,6 +233,10 @@ module MathMetadata
232
233
  def join_article_authors( authors )
233
234
  authors.collect { |author| URI.escape MathMetadata.normalize_name(author) }.join(';%20') || ''
234
235
  end
236
+
237
+ def build_article_url(title, author, year)
238
+ self.class::ARTICLE_URL % [URI.escape(title), author, year]
239
+ end
235
240
 
236
241
  def fetch_article( args={} )
237
242
  opts = {:id => nil, :title => "", :year => "", :authors => []}.merge(args)
@@ -242,7 +247,7 @@ module MathMetadata
242
247
  title = '' if not title.kind_of?(String)
243
248
  title = MathMetadata.normalize_text(title)
244
249
  title = nwords(title) if @options[:nwords]
245
- url = self.class::ARTICLE_URL % [URI.escape(title), author, opts[:year].to_s]
250
+ url = build_article_url title, author, opts[:year].to_s
246
251
  end
247
252
 
248
253
  fetch_page(url, opts)
@@ -0,0 +1,58 @@
1
+ # -*-: coding: utf-8 -*-
2
+ # vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
3
+
4
+ module MathMetadata
5
+
6
+ # Bulgarian DML
7
+ # does not support author search
8
+ class BasBg < Site
9
+ ID = :basbg
10
+ NAME = "bas-bg"
11
+ URL = "http://sci-gems.math.bas.bg:8080/jspui/"
12
+
13
+
14
+ # AUTHOR_URL % "Author, Name"
15
+ AUTHOR_URL = %~~
16
+
17
+ AUTHORS_RE = %r{}mi
18
+ AUTHOR_RE = %r{}mi
19
+
20
+
21
+ ARTICLE_ID_URL = "http://sci-gems.math.bas.bg:8080/jspui/handle/%s"
22
+ ARTICLE_URL = "http://sci-gems.math.bas.bg:8080/jspui/simple-search?query=%s&from_advanced=true"
23
+
24
+ LIST_OF_ARTICLES_RE = %r{<table align="center" class="miscTable" summary="This table browses all dspace content">(.*?)</table>}mi
25
+ ARTICLE_ENTRY_RE = %r{<tr>.*?href="/jspui/handle/([^"]+)".*?</tr>}mi
26
+ #ARTICLE_ENTRY_RE = %r{<div class="headlineText">\s*<a href="/mathscinet/search/publdoc.html[^"]+">\s*<strong>\s*([^< ]+)\s*</strong>\s*<strong>}mi
27
+
28
+ ARTICLE_ID_RE = %r{<meta\s*name="DC.identifier"\s*content="http://hdl.handle.net/([^"]+)".*?/>}mi
29
+ ARTICLE_TITLE_RE = %r{<meta\s*name="dc.Title"\s*content="([^"]+)".*?/>}mi
30
+ ARTICLE_LANGUAGE_RE = %r{<meta\s*name="dc.language"\s*content="([^"]+)".*?/>}mi
31
+ ARTICLE_AUTHORS_RE = %r{<head>(.*?)</head>}mi
32
+ ARTICLE_AUTHOR_RE = %r{<meta\s* name="dc.creator"\s*content="([^"]+)".*?/>}mi
33
+ ARTICLE_MSCS_RE = %r{<meta.*?Classification:\s*(.*?)\s*".*?/>}mi
34
+ ARTICLE_MSC_RE = %r{([^,]+)}mi
35
+ ARTICLE_PUBLICATION_RE = %r{<tr>\s*<td\s*class="metadataFieldLabel">\s*Appears in Collections:\s*</td><td\s*class="metadataFieldValue">\s*<a href="[^"]*">\s*(.*?)\s*</a>.*?</tr>}mi
36
+ ARTICLE_PUBLISHER_RE = %r{<meta\s*name="DC.publisher"\s*content="([^"]+)".*?/>}mi
37
+ ARTICLE_RANGE_RE = %r{<tr>\s*<td class="label">\s*Pages:\s*</td>\s*<td\s*class="value">([^ <]+)</td>\s*</tr>}mi
38
+ ARTICLE_YEAR_RE = %r{td\s*class="metadataFieldLabel">\s*Issue Date:.*?</td>\s*<td\s*class="metadataFieldValue">\s*(.*?)\s*</td>}mi
39
+ ARTICLE_ISSNS_RE = %r{<center><table\s*class="itemDisplayTable">(.*?)</table>}mi
40
+ ARTICLE_ISSN_RE = %r{<td\s*class="metadataFieldLabel">\s*ISSN:.*?</td>\s*<td\s*class="metadataFieldValue">\s*(.*?)\s*</td>}mi
41
+ ARTICLE_KEYWORDS_RE = %r{<head>(.*?)</head>}mi
42
+ ARTICLE_KEYWORD_RE = %r{<meta\s*name="dc.subject"\s*content="([^"]+)".*?/>}mi
43
+ ARTICLE_REFERENCES_RE = %r{<table\s*xmlns:fn="http://www.w3.org/2003/11/xpath-functions"\s*class="dml_detail_view">(.*?)</table>}mi
44
+ ARTICLE_REFERENCE_RE = %r{<tr>\s*<td class="label">Reference:\s*</td>\s*<td class="value">\s*\[[^\]]+\]\s*([^<]+)</td>\s*</tr>}mi
45
+
46
+ def build_article_url(title, author, year)
47
+ prep_query = lambda{|prefix,str| str.to_s.split(/ +/).map{|t| "#{prefix}%3A#{URI.escape(t)}"}.join("+")}
48
+ query = "((%s)+AND+(%s))" % [ prep_query.call("title", title), prep_query.call("author", author)]
49
+ self.class::ARTICLE_URL % [query, author]
50
+ end
51
+
52
+ def join_article_authors( authors )
53
+ authors.join(" ")
54
+ end
55
+
56
+ end
57
+
58
+ end
@@ -126,8 +126,7 @@ module MathMetadata
126
126
  form["au_op"] = "and"
127
127
  form["bibitems.text_op"] = "and"
128
128
 
129
- url = self.class::ARTICLE_URL % [URI.escape(title), author, opts[:year].to_s]
130
- # url = self.class::ARTICLE_URL
129
+ url = build_article_url title, author, opts[:year].to_s
131
130
  else
132
131
  return fetch_page(url, opts)
133
132
  end
@@ -79,7 +79,7 @@ module MathMetadata
79
79
  str = str.to_ascii.downcase
80
80
  str = remove_punctuation(str)
81
81
  str.gsub!(%r{\W+}, ' ')
82
- str.gsub!(%r{(?:the|a|of|)\s+}i, ' ')
82
+ str.gsub(%r{(?: the| a| of|^a|^the|^of)\s+}i, ' ')
83
83
  str.gsub!(%r{\s+}, ' ')
84
84
  str.strip
85
85
  end
@@ -13,12 +13,13 @@ require 'find'
13
13
  s.email = "pejuko@gmail.com"
14
14
  s.authors = ["Petr Kovar"]
15
15
  s.name = 'math_metadata_lookup'
16
- s.version = '0.2.0'
16
+ s.version = '0.2.1'
17
17
  s.date = Time.now.strftime("%Y-%m-%d")
18
18
  s.add_dependency('unicode')
19
19
  s.add_dependency('unidecoder')
20
20
  s.add_dependency('ya2yaml')
21
21
  s.add_dependency('json')
22
+ s.add_dependency('htmlentities')
22
23
  s.require_path = 'lib'
23
24
  s.files = ["bin/math_metadata_lookup", "README.md", "math_metadata_lookup.gemspec", "TODO", "Rakefile"]
24
25
  s.files += Dir["lib/**/*.rb", "resources/*"]
metadata CHANGED
@@ -1,86 +1,83 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: math_metadata_lookup
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 2
8
- - 0
9
- version: 0.2.0
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Petr Kovar
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2011-06-10 00:00:00 +02:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-02-24 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: unicode
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &21417260 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
- version: "0"
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
31
22
  type: :runtime
32
- version_requirements: *id001
33
- - !ruby/object:Gem::Dependency
34
- name: unidecoder
35
23
  prerelease: false
36
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *21417260
25
+ - !ruby/object:Gem::Dependency
26
+ name: unidecoder
27
+ requirement: &21416480 !ruby/object:Gem::Requirement
37
28
  none: false
38
- requirements:
39
- - - ">="
40
- - !ruby/object:Gem::Version
41
- segments:
42
- - 0
43
- version: "0"
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
44
33
  type: :runtime
45
- version_requirements: *id002
46
- - !ruby/object:Gem::Dependency
47
- name: ya2yaml
48
34
  prerelease: false
49
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *21416480
36
+ - !ruby/object:Gem::Dependency
37
+ name: ya2yaml
38
+ requirement: &21415880 !ruby/object:Gem::Requirement
50
39
  none: false
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- segments:
55
- - 0
56
- version: "0"
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
57
44
  type: :runtime
58
- version_requirements: *id003
59
- - !ruby/object:Gem::Dependency
45
+ prerelease: false
46
+ version_requirements: *21415880
47
+ - !ruby/object:Gem::Dependency
60
48
  name: json
49
+ requirement: &21415320 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :runtime
61
56
  prerelease: false
62
- requirement: &id004 !ruby/object:Gem::Requirement
57
+ version_requirements: *21415320
58
+ - !ruby/object:Gem::Dependency
59
+ name: htmlentities
60
+ requirement: &21414800 !ruby/object:Gem::Requirement
63
61
  none: false
64
- requirements:
65
- - - ">="
66
- - !ruby/object:Gem::Version
67
- segments:
68
- - 0
69
- version: "0"
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
70
66
  type: :runtime
71
- version_requirements: *id004
72
- description: |
73
- This utility/library search mathematical reviews sites and fetches metadata about articles.
67
+ prerelease: false
68
+ version_requirements: *21414800
69
+ description: ! 'This utility/library search mathematical reviews sites and fetches
70
+ metadata about articles.
71
+
74
72
  It can return results as one of text, xml, html, yaml, json or ruby formats.
75
73
 
74
+ '
76
75
  email: pejuko@gmail.com
77
- executables:
76
+ executables:
78
77
  - math_metadata_lookup
79
78
  extensions: []
80
-
81
79
  extra_rdoc_files: []
82
-
83
- files:
80
+ files:
84
81
  - bin/math_metadata_lookup
85
82
  - README.md
86
83
  - math_metadata_lookup.gemspec
@@ -92,6 +89,7 @@ files:
92
89
  - lib/math_metadata_lookup/tools.rb
93
90
  - lib/math_metadata_lookup/lookup.rb
94
91
  - lib/math_metadata_lookup/result.rb
92
+ - lib/math_metadata_lookup/sites/bas-bg.rb
95
93
  - lib/math_metadata_lookup/sites/numdam.rb
96
94
  - lib/math_metadata_lookup/sites/mr.rb
97
95
  - lib/math_metadata_lookup/sites/zbl.rb
@@ -101,37 +99,28 @@ files:
101
99
  - lib/math_metadata_lookup/entity.rb
102
100
  - lib/math_metadata_lookup/author.rb
103
101
  - resources/math_metadata_lookup.js
104
- has_rdoc: true
105
102
  homepage: http://github.com/pejuko/math_metadata_lookup
106
103
  licenses: []
107
-
108
104
  post_install_message:
109
105
  rdoc_options: []
110
-
111
- require_paths:
106
+ require_paths:
112
107
  - lib
113
- required_ruby_version: !ruby/object:Gem::Requirement
108
+ required_ruby_version: !ruby/object:Gem::Requirement
114
109
  none: false
115
- requirements:
116
- - - ">="
117
- - !ruby/object:Gem::Version
118
- segments:
119
- - 0
120
- version: "0"
121
- required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ required_rubygems_version: !ruby/object:Gem::Requirement
122
115
  none: false
123
- requirements:
124
- - - ">="
125
- - !ruby/object:Gem::Version
126
- segments:
127
- - 0
128
- version: "0"
116
+ requirements:
117
+ - - ! '>='
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
129
120
  requirements: []
130
-
131
121
  rubyforge_project:
132
- rubygems_version: 1.3.7
122
+ rubygems_version: 1.8.10
133
123
  signing_key:
134
124
  specification_version: 3
135
125
  summary: Search mathematical reviews sites and fetches metadata about articles.
136
126
  test_files: []
137
-