math_metadata_lookup 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +40 -0
- data/TODO +5 -3
- data/bin/math_metadata_lookup +2 -2
- data/lib/math_metadata_lookup/result.rb +6 -2
- data/lib/math_metadata_lookup/site.rb +0 -1
- data/lib/math_metadata_lookup/tools.rb +4 -3
- data/math_metadata_lookup.gemspec +4 -3
- data/resources/math_metadata_lookup.js +4 -0
- metadata +20 -8
data/README.md
CHANGED
@@ -5,6 +5,12 @@ This utility search mathematical reviews sites and fetches metadata about articl
|
|
5
5
|
It returns results as one of text, xml, html, yaml or ruby formats.
|
6
6
|
|
7
7
|
|
8
|
+
Installation
|
9
|
+
------------
|
10
|
+
|
11
|
+
gem install math_metadata_lookup
|
12
|
+
|
13
|
+
|
8
14
|
Command line usage example
|
9
15
|
--------------------------
|
10
16
|
|
@@ -56,14 +62,48 @@ Hash arguments are:
|
|
56
62
|
* article id is unknown
|
57
63
|
* **:title** String
|
58
64
|
* **:authors** Array of strings
|
65
|
+
* **:year**
|
59
66
|
|
60
67
|
Returns instance of class Result.
|
61
68
|
|
62
69
|
|
63
70
|
#Lookup#author( hash )
|
64
71
|
|
72
|
+
Search for authors "name forms".
|
73
|
+
|
65
74
|
Hash arguments are:
|
66
75
|
|
67
76
|
* **:name** String. Author name.
|
68
77
|
|
69
78
|
Returns instance of class Result.
|
79
|
+
|
80
|
+
|
81
|
+
#Lookup#heuristic( hash )
|
82
|
+
|
83
|
+
Returns only one best match from each site where similarity is higher then threshold.
|
84
|
+
It runs article searh with first two words from title and only surnames from author names.
|
85
|
+
The result of search is sorted by similarity and articles with similarity less then threshold are deleted.
|
86
|
+
Similarity is count as weighted average from title, authors and year using Levenshtein distance method.
|
87
|
+
The Levenshtein distance function is run on full given title and full given names.
|
88
|
+
|
89
|
+
Hash arguments are:
|
90
|
+
|
91
|
+
* **:title** String
|
92
|
+
* **:author** Array of strings
|
93
|
+
* **:year** String
|
94
|
+
* **:threshold** Float. Range: 0.0...1.0. Default: 0.6
|
95
|
+
|
96
|
+
Returns instance of class Result.
|
97
|
+
|
98
|
+
|
99
|
+
#Lookup#reference( hash )
|
100
|
+
|
101
|
+
Parse reference string and run heuristic.
|
102
|
+
|
103
|
+
Hash arguments are:
|
104
|
+
|
105
|
+
* **:reference** String
|
106
|
+
* **:threshold** Float. Range: 0.0...1.0. Default: 0.6
|
107
|
+
|
108
|
+
Returns instance of class Result.
|
109
|
+
|
data/TODO
CHANGED
@@ -2,9 +2,7 @@ Tasks
|
|
2
2
|
=====
|
3
3
|
|
4
4
|
o cache
|
5
|
-
|
6
|
-
x joining results
|
7
|
-
- guess relevant article if possible
|
5
|
+
o tests
|
8
6
|
|
9
7
|
|
10
8
|
Completed
|
@@ -13,3 +11,7 @@ Completed
|
|
13
11
|
* year argument
|
14
12
|
* support for references
|
15
13
|
* xml output
|
14
|
+
* heuristic
|
15
|
+
x joining results
|
16
|
+
* guess relevant article if possible
|
17
|
+
|
data/bin/math_metadata_lookup
CHANGED
@@ -9,6 +9,7 @@ $:.unshift File.expand_path(File.join(File.dirname(__FILE__), "../lib"))
|
|
9
9
|
def print_help
|
10
10
|
puts "
|
11
11
|
Search mathematical reviews sites and fetches metadata about articles.
|
12
|
+
https://github.com/pejuko/math_metadata_lookup
|
12
13
|
|
13
14
|
#{$0} <command> <options>
|
14
15
|
|
@@ -30,7 +31,7 @@ def print_help
|
|
30
31
|
|
31
32
|
|
32
33
|
heuristic -- returns only one best match from each site where
|
33
|
-
similarity is higher
|
34
|
+
similarity is higher then threshold
|
34
35
|
|
35
36
|
--title, -t <title>
|
36
37
|
--author, -a <author> -- repeatable option
|
@@ -60,7 +61,6 @@ def print_help
|
|
60
61
|
end
|
61
62
|
|
62
63
|
require 'pp'
|
63
|
-
require 'yaml'
|
64
64
|
require 'rubygems'
|
65
65
|
require 'math_metadata_lookup'
|
66
66
|
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# -*-: coding: utf-8 -*-
|
2
2
|
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
3
|
|
4
|
+
require 'ya2yaml'
|
5
|
+
|
4
6
|
module MathMetadata
|
5
7
|
|
6
8
|
class Result
|
@@ -33,10 +35,11 @@ module MathMetadata
|
|
33
35
|
def to_html
|
34
36
|
result = ""
|
35
37
|
@metadata.each do |site|
|
38
|
+
next unless site[:result]
|
36
39
|
result << %~
|
37
40
|
<div class="site">
|
38
41
|
<h3>Site: #{site[:name]}</h3>~
|
39
|
-
site[:result].each do |entity|
|
42
|
+
site[:result].to_a.each do |entity|
|
40
43
|
result << entity.to_html
|
41
44
|
end
|
42
45
|
result << %~</div>~
|
@@ -51,6 +54,7 @@ module MathMetadata
|
|
51
54
|
result << %~<?xml version="1.0" encoding="utf-8"?>
|
52
55
|
<mml>~
|
53
56
|
@metadata.each do |site|
|
57
|
+
next unless site[:result]
|
54
58
|
result << %~
|
55
59
|
<site name="#{site[:name]}">~
|
56
60
|
site[:result].each do |entity|
|
@@ -67,7 +71,7 @@ module MathMetadata
|
|
67
71
|
|
68
72
|
|
69
73
|
def to_yaml
|
70
|
-
@metadata.
|
74
|
+
@metadata.ya2yaml
|
71
75
|
end
|
72
76
|
|
73
77
|
|
@@ -2,6 +2,7 @@
|
|
2
2
|
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
3
|
|
4
4
|
require 'unicode'
|
5
|
+
require 'unidecoder'
|
5
6
|
|
6
7
|
module MathMetadata
|
7
8
|
|
@@ -50,7 +51,7 @@ module MathMetadata
|
|
50
51
|
def normalize_name( name )
|
51
52
|
# only latin chars
|
52
53
|
trans = latex_to_utf8(name.to_s)
|
53
|
-
trans =
|
54
|
+
trans = trans.to_ascii
|
54
55
|
|
55
56
|
# remove Jr.
|
56
57
|
trans.sub! %r{\bjr\.(\b|$)}i, ' '
|
@@ -74,8 +75,8 @@ module MathMetadata
|
|
74
75
|
|
75
76
|
|
76
77
|
def normalize_text( s )
|
77
|
-
str = latex_to_utf8(s)
|
78
|
-
str =
|
78
|
+
str = latex_to_utf8(s.to_s)
|
79
|
+
str = str.to_ascii.downcase
|
79
80
|
str = remove_punctuation(str)
|
80
81
|
str.gsub!(%r{\W+}, ' ')
|
81
82
|
str.gsub!(%r{(?:the|a|of|)\s+}i, ' ')
|
@@ -13,13 +13,14 @@ spec = Gem::Specification.new do |s|
|
|
13
13
|
s.email = "pejuko@gmail.com"
|
14
14
|
s.authors = ["Petr Kovar"]
|
15
15
|
s.name = 'math_metadata_lookup'
|
16
|
-
s.version = '0.1.
|
16
|
+
s.version = '0.1.2'
|
17
17
|
s.date = Time.now.strftime("%Y-%m-%d")
|
18
|
-
s.add_dependency('i18n', '>= 0.5.0')
|
19
18
|
s.add_dependency('unicode')
|
19
|
+
s.add_dependency('unidecoder')
|
20
|
+
s.add_dependency('ya2yaml')
|
20
21
|
s.require_path = 'lib'
|
21
22
|
s.files = ["bin/math_metadata_lookup", "README.md", "math_metadata_lookup.gemspec", "TODO", "Rakefile"]
|
22
|
-
s.files += Dir["lib/**/*.rb"]
|
23
|
+
s.files += Dir["lib/**/*.rb", "resources/*"]
|
23
24
|
s.executables = ["math_metadata_lookup"]
|
24
25
|
s.description = <<EOF
|
25
26
|
This utility/library search mathematical reviews sites and fetches metadata about articles.
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 2
|
9
|
+
version: 0.1.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Petr Kovar
|
@@ -14,11 +14,11 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-27 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
|
-
name:
|
21
|
+
name: unicode
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
24
|
none: false
|
@@ -27,13 +27,11 @@ dependencies:
|
|
27
27
|
- !ruby/object:Gem::Version
|
28
28
|
segments:
|
29
29
|
- 0
|
30
|
-
|
31
|
-
- 0
|
32
|
-
version: 0.5.0
|
30
|
+
version: "0"
|
33
31
|
type: :runtime
|
34
32
|
version_requirements: *id001
|
35
33
|
- !ruby/object:Gem::Dependency
|
36
|
-
name:
|
34
|
+
name: unidecoder
|
37
35
|
prerelease: false
|
38
36
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
37
|
none: false
|
@@ -45,6 +43,19 @@ dependencies:
|
|
45
43
|
version: "0"
|
46
44
|
type: :runtime
|
47
45
|
version_requirements: *id002
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: ya2yaml
|
48
|
+
prerelease: false
|
49
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
segments:
|
55
|
+
- 0
|
56
|
+
version: "0"
|
57
|
+
type: :runtime
|
58
|
+
version_requirements: *id003
|
48
59
|
description: |
|
49
60
|
This utility/library search mathematical reviews sites and fetches metadata about articles.
|
50
61
|
It can return results as one of text, xml, html, yaml or ruby formats.
|
@@ -73,6 +84,7 @@ files:
|
|
73
84
|
- lib/math_metadata_lookup/reference.rb
|
74
85
|
- lib/math_metadata_lookup/entity.rb
|
75
86
|
- lib/math_metadata_lookup/author.rb
|
87
|
+
- resources/math_metadata_lookup.js
|
76
88
|
has_rdoc: true
|
77
89
|
homepage: http://github.com/pejuko/math_metadata_lookup
|
78
90
|
licenses: []
|