anystyle-parser 0.6.3 → 0.6.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -0
- data/anystyle-parser.gemspec +2 -2
- data/lib/anystyle/parser/normalizer.rb +21 -1
- data/lib/anystyle/parser/version.rb +1 -1
- data/spec/anystyle/parser/normalizer_spec.rb +3 -0
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc9078e1caff3c730c0db06e5ccdf4785b86b7a6
|
4
|
+
data.tar.gz: 9a6aae1bfde176b574c49ceab02b78254fdf3251
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3eff36b014282cb5147fc3224c9c86564390ea125a819c62745a13ebfb35d7cd7ca62bddbefbb00bf0597cdf23b596e858c08a2b759e9234decbb8515acbcf46
|
7
|
+
data.tar.gz: 7316bef01b00867ebe5f06a7d6fdba4827bb2b6ea3b7429ec16fdfa6c0fa2492ccd40cb4945b7ce16f90b61cd5bb80e3e80f723a1d7a7a843a1702a203babfde
|
data/README.md
CHANGED
@@ -14,6 +14,11 @@ train the model with data that is relevant to your parsing needs), and
|
|
14
14
|
compatibility (Anystyle-Parser exports to Ruby Hashes, BibTeX, or the
|
15
15
|
CSL/CiteProc JSON format).
|
16
16
|
|
17
|
+
Web Application and Web Service
|
18
|
+
-------------------------------
|
19
|
+
Anystyle-Parser is avaialble as a web application and a web service at
|
20
|
+
[http://anystyle.io](http://anystyle.io).
|
21
|
+
|
17
22
|
Installation
|
18
23
|
------------
|
19
24
|
|
data/anystyle-parser.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.platform = Gem::Platform::RUBY
|
11
11
|
s.authors = ['Sylvester Keil']
|
12
12
|
s.email = ['http://sylvester.keil.or.at']
|
13
|
-
s.homepage = 'http://
|
13
|
+
s.homepage = 'http://anystyle.io'
|
14
14
|
s.summary = 'Smart and fast academic bibliography parser.'
|
15
15
|
s.description = 'A sophisticated parser for academic reference lists and bibliographies based on machine learning algorithms using conditional random fields.'
|
16
16
|
s.license = 'FreeBSD'
|
@@ -20,7 +20,7 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.add_runtime_dependency('bibtex-ruby', '~>3.0')
|
21
21
|
s.add_runtime_dependency('builder', '>=3.0', '<4.0')
|
22
22
|
s.add_runtime_dependency('wapiti', '~>0.1')
|
23
|
-
s.add_runtime_dependency('namae', '~>0.8', '>=0.8.
|
23
|
+
s.add_runtime_dependency('namae', '~>0.8', '>=0.8.7')
|
24
24
|
|
25
25
|
s.files = `git ls-files`.split("\n").reject { |path|
|
26
26
|
path.start_with?('.')
|
@@ -53,10 +53,30 @@ module Anystyle
|
|
53
53
|
unmatched(key, hash, dangling) unless dangling.empty?
|
54
54
|
|
55
55
|
token.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
|
56
|
+
|
56
57
|
hash[key] = token
|
57
58
|
hash
|
58
59
|
end
|
59
60
|
|
61
|
+
def normalize_key(hash)
|
62
|
+
token, *dangling = hash[:key]
|
63
|
+
unmatched(:key, hash, dangling) unless dangling.empty?
|
64
|
+
|
65
|
+
token.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
|
66
|
+
token.gsub!(/^bibitem\{/i, '')
|
67
|
+
|
68
|
+
hash[:key] = token
|
69
|
+
hash
|
70
|
+
end
|
71
|
+
|
72
|
+
def normalize_citation_number(hash)
|
73
|
+
token, *dangling = hash[:citation_number]
|
74
|
+
unmatched(:citation_number, hash, dangling) unless dangling.empty?
|
75
|
+
|
76
|
+
hash[:citation_number] = token[/\d+/] || token
|
77
|
+
hash
|
78
|
+
end
|
79
|
+
|
60
80
|
def normalize_author(hash)
|
61
81
|
authors, *dangling = hash[:author]
|
62
82
|
unmatched(:author, hash, dangling) unless dangling.empty?
|
@@ -253,7 +273,7 @@ module Anystyle
|
|
253
273
|
if date =~ /(\d{4})/
|
254
274
|
hash[:year] = $1.to_i
|
255
275
|
|
256
|
-
if hash.key?(:month) && date =~
|
276
|
+
if hash.key?(:month) && date =~ /\b(\d{1,2})\b/
|
257
277
|
hash[:day] = $1.to_i
|
258
278
|
end
|
259
279
|
|
@@ -99,6 +99,7 @@ module Anystyle
|
|
99
99
|
h[:year].should == 2009
|
100
100
|
h[:month].should == 7
|
101
101
|
h.should_not have_key(:date)
|
102
|
+
h.should_not have_key(:day)
|
102
103
|
end
|
103
104
|
|
104
105
|
it 'extracts month and year from a string like "(1997 Sept.)"' do
|
@@ -106,10 +107,12 @@ module Anystyle
|
|
106
107
|
h[:year].should == 1997
|
107
108
|
h[:month].should == 9
|
108
109
|
h.should_not have_key(:date)
|
110
|
+
h.should_not have_key(:day)
|
109
111
|
|
110
112
|
h = Normalizer.instance.normalize_date(:date => '(1997 Okt.)')
|
111
113
|
h[:year].should == 1997
|
112
114
|
h[:month].should == 10
|
115
|
+
h.should_not have_key(:day)
|
113
116
|
end
|
114
117
|
|
115
118
|
it 'extracts days if month and year are present' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anystyle-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bibtex-ruby
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
version: '0.8'
|
68
68
|
- - '>='
|
69
69
|
- !ruby/object:Gem::Version
|
70
|
-
version: 0.8.
|
70
|
+
version: 0.8.7
|
71
71
|
type: :runtime
|
72
72
|
prerelease: false
|
73
73
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -77,7 +77,7 @@ dependencies:
|
|
77
77
|
version: '0.8'
|
78
78
|
- - '>='
|
79
79
|
- !ruby/object:Gem::Version
|
80
|
-
version: 0.8.
|
80
|
+
version: 0.8.7
|
81
81
|
description: A sophisticated parser for academic reference lists and bibliographies
|
82
82
|
based on machine learning algorithms using conditional random fields.
|
83
83
|
email:
|
@@ -116,7 +116,7 @@ files:
|
|
116
116
|
- spec/fixtures/train_dps.txt
|
117
117
|
- spec/profile.rb
|
118
118
|
- spec/spec_helper.rb
|
119
|
-
homepage: http://
|
119
|
+
homepage: http://anystyle.io
|
120
120
|
licenses:
|
121
121
|
- FreeBSD
|
122
122
|
metadata: {}
|