anystyle-parser 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -0
- data/anystyle-parser.gemspec +2 -2
- data/lib/anystyle/parser/normalizer.rb +21 -1
- data/lib/anystyle/parser/version.rb +1 -1
- data/spec/anystyle/parser/normalizer_spec.rb +3 -0
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc9078e1caff3c730c0db06e5ccdf4785b86b7a6
|
4
|
+
data.tar.gz: 9a6aae1bfde176b574c49ceab02b78254fdf3251
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3eff36b014282cb5147fc3224c9c86564390ea125a819c62745a13ebfb35d7cd7ca62bddbefbb00bf0597cdf23b596e858c08a2b759e9234decbb8515acbcf46
|
7
|
+
data.tar.gz: 7316bef01b00867ebe5f06a7d6fdba4827bb2b6ea3b7429ec16fdfa6c0fa2492ccd40cb4945b7ce16f90b61cd5bb80e3e80f723a1d7a7a843a1702a203babfde
|
data/README.md
CHANGED
@@ -14,6 +14,11 @@ train the model with data that is relevant to your parsing needs), and
|
|
14
14
|
compatibility (Anystyle-Parser exports to Ruby Hashes, BibTeX, or the
|
15
15
|
CSL/CiteProc JSON format).
|
16
16
|
|
17
|
+
Web Application and Web Service
|
18
|
+
-------------------------------
|
19
|
+
Anystyle-Parser is avaialble as a web application and a web service at
|
20
|
+
[http://anystyle.io](http://anystyle.io).
|
21
|
+
|
17
22
|
Installation
|
18
23
|
------------
|
19
24
|
|
data/anystyle-parser.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.platform = Gem::Platform::RUBY
|
11
11
|
s.authors = ['Sylvester Keil']
|
12
12
|
s.email = ['http://sylvester.keil.or.at']
|
13
|
-
s.homepage = 'http://
|
13
|
+
s.homepage = 'http://anystyle.io'
|
14
14
|
s.summary = 'Smart and fast academic bibliography parser.'
|
15
15
|
s.description = 'A sophisticated parser for academic reference lists and bibliographies based on machine learning algorithms using conditional random fields.'
|
16
16
|
s.license = 'FreeBSD'
|
@@ -20,7 +20,7 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.add_runtime_dependency('bibtex-ruby', '~>3.0')
|
21
21
|
s.add_runtime_dependency('builder', '>=3.0', '<4.0')
|
22
22
|
s.add_runtime_dependency('wapiti', '~>0.1')
|
23
|
-
s.add_runtime_dependency('namae', '~>0.8', '>=0.8.
|
23
|
+
s.add_runtime_dependency('namae', '~>0.8', '>=0.8.7')
|
24
24
|
|
25
25
|
s.files = `git ls-files`.split("\n").reject { |path|
|
26
26
|
path.start_with?('.')
|
@@ -53,10 +53,30 @@ module Anystyle
|
|
53
53
|
unmatched(key, hash, dangling) unless dangling.empty?
|
54
54
|
|
55
55
|
token.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
|
56
|
+
|
56
57
|
hash[key] = token
|
57
58
|
hash
|
58
59
|
end
|
59
60
|
|
61
|
+
def normalize_key(hash)
|
62
|
+
token, *dangling = hash[:key]
|
63
|
+
unmatched(:key, hash, dangling) unless dangling.empty?
|
64
|
+
|
65
|
+
token.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
|
66
|
+
token.gsub!(/^bibitem\{/i, '')
|
67
|
+
|
68
|
+
hash[:key] = token
|
69
|
+
hash
|
70
|
+
end
|
71
|
+
|
72
|
+
def normalize_citation_number(hash)
|
73
|
+
token, *dangling = hash[:citation_number]
|
74
|
+
unmatched(:citation_number, hash, dangling) unless dangling.empty?
|
75
|
+
|
76
|
+
hash[:citation_number] = token[/\d+/] || token
|
77
|
+
hash
|
78
|
+
end
|
79
|
+
|
60
80
|
def normalize_author(hash)
|
61
81
|
authors, *dangling = hash[:author]
|
62
82
|
unmatched(:author, hash, dangling) unless dangling.empty?
|
@@ -253,7 +273,7 @@ module Anystyle
|
|
253
273
|
if date =~ /(\d{4})/
|
254
274
|
hash[:year] = $1.to_i
|
255
275
|
|
256
|
-
if hash.key?(:month) && date =~
|
276
|
+
if hash.key?(:month) && date =~ /\b(\d{1,2})\b/
|
257
277
|
hash[:day] = $1.to_i
|
258
278
|
end
|
259
279
|
|
@@ -99,6 +99,7 @@ module Anystyle
|
|
99
99
|
h[:year].should == 2009
|
100
100
|
h[:month].should == 7
|
101
101
|
h.should_not have_key(:date)
|
102
|
+
h.should_not have_key(:day)
|
102
103
|
end
|
103
104
|
|
104
105
|
it 'extracts month and year from a string like "(1997 Sept.)"' do
|
@@ -106,10 +107,12 @@ module Anystyle
|
|
106
107
|
h[:year].should == 1997
|
107
108
|
h[:month].should == 9
|
108
109
|
h.should_not have_key(:date)
|
110
|
+
h.should_not have_key(:day)
|
109
111
|
|
110
112
|
h = Normalizer.instance.normalize_date(:date => '(1997 Okt.)')
|
111
113
|
h[:year].should == 1997
|
112
114
|
h[:month].should == 10
|
115
|
+
h.should_not have_key(:day)
|
113
116
|
end
|
114
117
|
|
115
118
|
it 'extracts days if month and year are present' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anystyle-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bibtex-ruby
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
version: '0.8'
|
68
68
|
- - '>='
|
69
69
|
- !ruby/object:Gem::Version
|
70
|
-
version: 0.8.
|
70
|
+
version: 0.8.7
|
71
71
|
type: :runtime
|
72
72
|
prerelease: false
|
73
73
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -77,7 +77,7 @@ dependencies:
|
|
77
77
|
version: '0.8'
|
78
78
|
- - '>='
|
79
79
|
- !ruby/object:Gem::Version
|
80
|
-
version: 0.8.
|
80
|
+
version: 0.8.7
|
81
81
|
description: A sophisticated parser for academic reference lists and bibliographies
|
82
82
|
based on machine learning algorithms using conditional random fields.
|
83
83
|
email:
|
@@ -116,7 +116,7 @@ files:
|
|
116
116
|
- spec/fixtures/train_dps.txt
|
117
117
|
- spec/profile.rb
|
118
118
|
- spec/spec_helper.rb
|
119
|
-
homepage: http://
|
119
|
+
homepage: http://anystyle.io
|
120
120
|
licenses:
|
121
121
|
- FreeBSD
|
122
122
|
metadata: {}
|