anystyle-parser 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.md +4 -0
- data/README.md +4 -4
- data/lib/anystyle/parser/parser.rb +8 -2
- data/lib/anystyle/parser/version.rb +1 -1
- data/spec/anystyle/parser/parser_spec.rb +35 -0
- metadata +22 -16
data/HISTORY.md
CHANGED
data/README.md
CHANGED
@@ -41,19 +41,19 @@ Usage
|
|
41
41
|
|
42
42
|
You can access the main Anystyle-Parser instance at `Anystyle.parser`;
|
43
43
|
the `#parse` method is also available via `Anystyle.parse`. For more complex
|
44
|
-
requirements (e.g., if you need multiple Parser simultaneously) you
|
45
|
-
your own instances from the `Anystyle::Parser::Parser` class.
|
44
|
+
requirements (e.g., if you need multiple Parser instances simultaneously) you
|
45
|
+
can create your own instances from the `Anystyle::Parser::Parser` class.
|
46
46
|
|
47
47
|
The two fundamental methods you need to know about in order to use
|
48
48
|
Anystyle-Parser are `#parse` and `#train` that both accept two arguments.
|
49
49
|
|
50
50
|
Parser#parse(input, format = :hash)
|
51
|
-
Parser#train(input, truncate =
|
51
|
+
Parser#train(input = options[:training_data], truncate = true)
|
52
52
|
|
53
53
|
`#parse` parses the passed-in input (either a filename, your reference strings,
|
54
54
|
or an array of your reference strings) and returns the parsed data in the
|
55
55
|
format specified as the second argument (supported formats include: *:hash*,
|
56
|
-
*:bibtex*, and *:
|
56
|
+
*:bibtex*, *:citeproc*, and *:tags*).
|
57
57
|
|
58
58
|
`#train` allows you to easily train the Parser's CRF model. The first argument
|
59
59
|
is either a filename or your data as a string; the format of training data
|
@@ -3,7 +3,7 @@ module Anystyle
|
|
3
3
|
|
4
4
|
class Parser
|
5
5
|
|
6
|
-
@formats = [:bibtex, :hash, :citeproc].freeze
|
6
|
+
@formats = [:bibtex, :hash, :citeproc, :tags].freeze
|
7
7
|
|
8
8
|
@defaults = {
|
9
9
|
:model => File.expand_path('../support/anystyle.mod', __FILE__),
|
@@ -233,7 +233,13 @@ module Anystyle
|
|
233
233
|
def format_citeproc(labels)
|
234
234
|
format_bibtex(labels).to_citeproc
|
235
235
|
end
|
236
|
-
|
236
|
+
|
237
|
+
def format_tags(labels)
|
238
|
+
labels.map do |line|
|
239
|
+
line.map { |label, token| "<#{label}>#{token}</#{label}>" }.join(' ')
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
237
243
|
end
|
238
244
|
|
239
245
|
end
|
@@ -70,6 +70,35 @@ module Anystyle::Parser
|
|
70
70
|
it 'returns an array of labelled segments' do
|
71
71
|
subject.label(citation)[0].map(&:first).should == [:author, :title, :location, :publisher, :date, :pages]
|
72
72
|
end
|
73
|
+
|
74
|
+
describe 'when passed more than one line' do
|
75
|
+
it 'returns two arrays' do
|
76
|
+
subject.label("foo\nbar").should have(2).elements
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe 'when passed invalid input' do
|
81
|
+
it 'returns an empty array for an empty string' do
|
82
|
+
subject.label('').should == []
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'returns an empty array for an empty line' do
|
86
|
+
subject.label("\n").should == []
|
87
|
+
subject.label("\n ").should == [[],[]]
|
88
|
+
subject.label(" \n ").should == [[],[]]
|
89
|
+
subject.label(" \n").should == [[]]
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'does not fail for unrecognizable input' do
|
93
|
+
lambda { subject.label("@misc{70213094902020,\n") }.should_not raise_error
|
94
|
+
lambda { subject.label("doi = {DOI:10.1503/jpn.100140}\n}\n") }.should_not raise_error
|
95
|
+
|
96
|
+
pending
|
97
|
+
lambda { subject.label("\n doi ") }.should_not raise_error
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
73
102
|
end
|
74
103
|
|
75
104
|
describe "#parse" do
|
@@ -78,6 +107,12 @@ module Anystyle::Parser
|
|
78
107
|
it 'returns a hash of label/segment pairs by default' do
|
79
108
|
subject.parse(citation)[0].should == { :author => 'Perec, Georges', :title => 'A Void', :location => 'London', :publisher => 'The Harvill Press', :year => 1995, :pages => '108', :type => :book }
|
80
109
|
end
|
110
|
+
|
111
|
+
describe 'using output format "tags"' do
|
112
|
+
it 'returns a tagged string' do
|
113
|
+
subject.parse(citation, :tags)[0].should == '<author>Perec, Georges.</author> <title>A Void.</title> <location>London:</location> <publisher>The Harvill Press,</publisher> <date>1995.</date> <pages>p.108.</pages>'
|
114
|
+
end
|
115
|
+
end
|
81
116
|
end
|
82
117
|
|
83
118
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anystyle-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-03-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bibtex-ruby
|
16
|
-
requirement: &
|
16
|
+
requirement: &70338916773120 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '1.3'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70338916773120
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: wapiti
|
27
|
-
requirement: &
|
27
|
+
requirement: &70338916772120 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0.0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70338916772120
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rake
|
38
|
-
requirement: &
|
38
|
+
requirement: &70338916770100 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0.9'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70338916770100
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: racc
|
49
|
-
requirement: &
|
49
|
+
requirement: &70338916769520 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '1.4'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70338916769520
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: cucumber
|
60
|
-
requirement: &
|
60
|
+
requirement: &70338916768880 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '1.0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70338916768880
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rspec
|
71
|
-
requirement: &
|
71
|
+
requirement: &70338916768220 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: '2.6'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70338916768220
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: ZenTest
|
82
|
-
requirement: &
|
82
|
+
requirement: &70338916767680 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ~>
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: '4.6'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *70338916767680
|
91
91
|
description: A sophisticated parser for academic references based on conditional random
|
92
92
|
fields.
|
93
93
|
email:
|
@@ -146,12 +146,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
146
146
|
- - ! '>='
|
147
147
|
- !ruby/object:Gem::Version
|
148
148
|
version: '0'
|
149
|
+
segments:
|
150
|
+
- 0
|
151
|
+
hash: -640366899922045737
|
149
152
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
150
153
|
none: false
|
151
154
|
requirements:
|
152
155
|
- - ! '>='
|
153
156
|
- !ruby/object:Gem::Version
|
154
157
|
version: '0'
|
158
|
+
segments:
|
159
|
+
- 0
|
160
|
+
hash: -640366899922045737
|
155
161
|
requirements: []
|
156
162
|
rubyforge_project:
|
157
163
|
rubygems_version: 1.8.10
|