math_metadata_lookup 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +40 -0
- data/TODO +5 -3
- data/bin/math_metadata_lookup +2 -2
- data/lib/math_metadata_lookup/result.rb +6 -2
- data/lib/math_metadata_lookup/site.rb +0 -1
- data/lib/math_metadata_lookup/tools.rb +4 -3
- data/math_metadata_lookup.gemspec +4 -3
- data/resources/math_metadata_lookup.js +4 -0
- metadata +20 -8
data/README.md
CHANGED
@@ -5,6 +5,12 @@ This utility search mathematical reviews sites and fetches metadata about articl
|
|
5
5
|
It returns results as one of text, xml, html, yaml or ruby formats.
|
6
6
|
|
7
7
|
|
8
|
+
Installation
|
9
|
+
------------
|
10
|
+
|
11
|
+
gem install math_metadata_lookup
|
12
|
+
|
13
|
+
|
8
14
|
Command line usage example
|
9
15
|
--------------------------
|
10
16
|
|
@@ -56,14 +62,48 @@ Hash arguments are:
|
|
56
62
|
* article id is unknown
|
57
63
|
* **:title** String
|
58
64
|
* **:authors** Array of strings
|
65
|
+
* **:year**
|
59
66
|
|
60
67
|
Returns instance of class Result.
|
61
68
|
|
62
69
|
|
63
70
|
#Lookup#author( hash )
|
64
71
|
|
72
|
+
Search for authors "name forms".
|
73
|
+
|
65
74
|
Hash arguments are:
|
66
75
|
|
67
76
|
* **:name** String. Author name.
|
68
77
|
|
69
78
|
Returns instance of class Result.
|
79
|
+
|
80
|
+
|
81
|
+
#Lookup#heuristic( hash )
|
82
|
+
|
83
|
+
Returns only one best match from each site where similarity is higher then threshold.
|
84
|
+
It runs article searh with first two words from title and only surnames from author names.
|
85
|
+
The result of search is sorted by similarity and articles with similarity less then threshold are deleted.
|
86
|
+
Similarity is count as weighted average from title, authors and year using Levenshtein distance method.
|
87
|
+
The Levenshtein distance function is run on full given title and full given names.
|
88
|
+
|
89
|
+
Hash arguments are:
|
90
|
+
|
91
|
+
* **:title** String
|
92
|
+
* **:author** Array of strings
|
93
|
+
* **:year** String
|
94
|
+
* **:threshold** Float. Range: 0.0...1.0. Default: 0.6
|
95
|
+
|
96
|
+
Returns instance of class Result.
|
97
|
+
|
98
|
+
|
99
|
+
#Lookup#reference( hash )
|
100
|
+
|
101
|
+
Parse reference string and run heuristic.
|
102
|
+
|
103
|
+
Hash arguments are:
|
104
|
+
|
105
|
+
* **:reference** String
|
106
|
+
* **:threshold** Float. Range: 0.0...1.0. Default: 0.6
|
107
|
+
|
108
|
+
Returns instance of class Result.
|
109
|
+
|
data/TODO
CHANGED
@@ -2,9 +2,7 @@ Tasks
|
|
2
2
|
=====
|
3
3
|
|
4
4
|
o cache
|
5
|
-
|
6
|
-
x joining results
|
7
|
-
- guess relevant article if possible
|
5
|
+
o tests
|
8
6
|
|
9
7
|
|
10
8
|
Completed
|
@@ -13,3 +11,7 @@ Completed
|
|
13
11
|
* year argument
|
14
12
|
* support for references
|
15
13
|
* xml output
|
14
|
+
* heuristic
|
15
|
+
x joining results
|
16
|
+
* guess relevant article if possible
|
17
|
+
|
data/bin/math_metadata_lookup
CHANGED
@@ -9,6 +9,7 @@ $:.unshift File.expand_path(File.join(File.dirname(__FILE__), "../lib"))
|
|
9
9
|
def print_help
|
10
10
|
puts "
|
11
11
|
Search mathematical reviews sites and fetches metadata about articles.
|
12
|
+
https://github.com/pejuko/math_metadata_lookup
|
12
13
|
|
13
14
|
#{$0} <command> <options>
|
14
15
|
|
@@ -30,7 +31,7 @@ def print_help
|
|
30
31
|
|
31
32
|
|
32
33
|
heuristic -- returns only one best match from each site where
|
33
|
-
similarity is higher
|
34
|
+
similarity is higher then threshold
|
34
35
|
|
35
36
|
--title, -t <title>
|
36
37
|
--author, -a <author> -- repeatable option
|
@@ -60,7 +61,6 @@ def print_help
|
|
60
61
|
end
|
61
62
|
|
62
63
|
require 'pp'
|
63
|
-
require 'yaml'
|
64
64
|
require 'rubygems'
|
65
65
|
require 'math_metadata_lookup'
|
66
66
|
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# -*-: coding: utf-8 -*-
|
2
2
|
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
3
|
|
4
|
+
require 'ya2yaml'
|
5
|
+
|
4
6
|
module MathMetadata
|
5
7
|
|
6
8
|
class Result
|
@@ -33,10 +35,11 @@ module MathMetadata
|
|
33
35
|
def to_html
|
34
36
|
result = ""
|
35
37
|
@metadata.each do |site|
|
38
|
+
next unless site[:result]
|
36
39
|
result << %~
|
37
40
|
<div class="site">
|
38
41
|
<h3>Site: #{site[:name]}</h3>~
|
39
|
-
site[:result].each do |entity|
|
42
|
+
site[:result].to_a.each do |entity|
|
40
43
|
result << entity.to_html
|
41
44
|
end
|
42
45
|
result << %~</div>~
|
@@ -51,6 +54,7 @@ module MathMetadata
|
|
51
54
|
result << %~<?xml version="1.0" encoding="utf-8"?>
|
52
55
|
<mml>~
|
53
56
|
@metadata.each do |site|
|
57
|
+
next unless site[:result]
|
54
58
|
result << %~
|
55
59
|
<site name="#{site[:name]}">~
|
56
60
|
site[:result].each do |entity|
|
@@ -67,7 +71,7 @@ module MathMetadata
|
|
67
71
|
|
68
72
|
|
69
73
|
def to_yaml
|
70
|
-
@metadata.
|
74
|
+
@metadata.ya2yaml
|
71
75
|
end
|
72
76
|
|
73
77
|
|
@@ -2,6 +2,7 @@
|
|
2
2
|
# vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
|
3
3
|
|
4
4
|
require 'unicode'
|
5
|
+
require 'unidecoder'
|
5
6
|
|
6
7
|
module MathMetadata
|
7
8
|
|
@@ -50,7 +51,7 @@ module MathMetadata
|
|
50
51
|
def normalize_name( name )
|
51
52
|
# only latin chars
|
52
53
|
trans = latex_to_utf8(name.to_s)
|
53
|
-
trans =
|
54
|
+
trans = trans.to_ascii
|
54
55
|
|
55
56
|
# remove Jr.
|
56
57
|
trans.sub! %r{\bjr\.(\b|$)}i, ' '
|
@@ -74,8 +75,8 @@ module MathMetadata
|
|
74
75
|
|
75
76
|
|
76
77
|
def normalize_text( s )
|
77
|
-
str = latex_to_utf8(s)
|
78
|
-
str =
|
78
|
+
str = latex_to_utf8(s.to_s)
|
79
|
+
str = str.to_ascii.downcase
|
79
80
|
str = remove_punctuation(str)
|
80
81
|
str.gsub!(%r{\W+}, ' ')
|
81
82
|
str.gsub!(%r{(?:the|a|of|)\s+}i, ' ')
|
@@ -13,13 +13,14 @@ spec = Gem::Specification.new do |s|
|
|
13
13
|
s.email = "pejuko@gmail.com"
|
14
14
|
s.authors = ["Petr Kovar"]
|
15
15
|
s.name = 'math_metadata_lookup'
|
16
|
-
s.version = '0.1.
|
16
|
+
s.version = '0.1.2'
|
17
17
|
s.date = Time.now.strftime("%Y-%m-%d")
|
18
|
-
s.add_dependency('i18n', '>= 0.5.0')
|
19
18
|
s.add_dependency('unicode')
|
19
|
+
s.add_dependency('unidecoder')
|
20
|
+
s.add_dependency('ya2yaml')
|
20
21
|
s.require_path = 'lib'
|
21
22
|
s.files = ["bin/math_metadata_lookup", "README.md", "math_metadata_lookup.gemspec", "TODO", "Rakefile"]
|
22
|
-
s.files += Dir["lib/**/*.rb"]
|
23
|
+
s.files += Dir["lib/**/*.rb", "resources/*"]
|
23
24
|
s.executables = ["math_metadata_lookup"]
|
24
25
|
s.description = <<EOF
|
25
26
|
This utility/library search mathematical reviews sites and fetches metadata about articles.
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 2
|
9
|
+
version: 0.1.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Petr Kovar
|
@@ -14,11 +14,11 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-27 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
|
-
name:
|
21
|
+
name: unicode
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
24
|
none: false
|
@@ -27,13 +27,11 @@ dependencies:
|
|
27
27
|
- !ruby/object:Gem::Version
|
28
28
|
segments:
|
29
29
|
- 0
|
30
|
-
|
31
|
-
- 0
|
32
|
-
version: 0.5.0
|
30
|
+
version: "0"
|
33
31
|
type: :runtime
|
34
32
|
version_requirements: *id001
|
35
33
|
- !ruby/object:Gem::Dependency
|
36
|
-
name:
|
34
|
+
name: unidecoder
|
37
35
|
prerelease: false
|
38
36
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
37
|
none: false
|
@@ -45,6 +43,19 @@ dependencies:
|
|
45
43
|
version: "0"
|
46
44
|
type: :runtime
|
47
45
|
version_requirements: *id002
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: ya2yaml
|
48
|
+
prerelease: false
|
49
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
segments:
|
55
|
+
- 0
|
56
|
+
version: "0"
|
57
|
+
type: :runtime
|
58
|
+
version_requirements: *id003
|
48
59
|
description: |
|
49
60
|
This utility/library search mathematical reviews sites and fetches metadata about articles.
|
50
61
|
It can return results as one of text, xml, html, yaml or ruby formats.
|
@@ -73,6 +84,7 @@ files:
|
|
73
84
|
- lib/math_metadata_lookup/reference.rb
|
74
85
|
- lib/math_metadata_lookup/entity.rb
|
75
86
|
- lib/math_metadata_lookup/author.rb
|
87
|
+
- resources/math_metadata_lookup.js
|
76
88
|
has_rdoc: true
|
77
89
|
homepage: http://github.com/pejuko/math_metadata_lookup
|
78
90
|
licenses: []
|