anystyle-parser 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
data/HISTORY.md CHANGED
@@ -1,3 +1,7 @@
1
+ 0.0.10 / 2012-03-01
2
+ ===================
3
+ * Added new output format: tags (to generate training data)
4
+
1
5
  0.0.9 / 2011-09-08
2
6
  ==================
3
7
  * Added year-range / page-range feature distinction
data/README.md CHANGED
@@ -41,19 +41,19 @@ Usage
41
41
 
42
42
  You can access the main Anystyle-Parser instance at `Anystyle.parser`;
43
43
  the `#parse` method is also available via `Anystyle.parse`. For more complex
44
- requirements (e.g., if you need multiple Parser simultaneously) you can create
45
- your own instances from the `Anystyle::Parser::Parser` class.
44
+ requirements (e.g., if you need multiple Parser instances simultaneously) you
45
+ can create your own instances from the `Anystyle::Parser::Parser` class.
46
46
 
47
47
  The two fundamental methods you need to know about in order to use
48
48
  Anystyle-Parser are `#parse` and `#train` that both accept two arguments.
49
49
 
50
50
  Parser#parse(input, format = :hash)
51
- Parser#train(input, truncate = false)
51
+ Parser#train(input = options[:training_data], truncate = true)
52
52
 
53
53
  `#parse` parses the passed-in input (either a filename, your reference strings,
54
54
  or an array of your reference strings) and returns the parsed data in the
55
55
  format specified as the second argument (supported formats include: *:hash*,
56
- *:bibtex*, and *:citeproc*).
56
+ *:bibtex*, *:citeproc*, and *:tags*).
57
57
 
58
58
  `#train` allows you to easily train the Parser's CRF model. The first argument
59
59
  is either a filename or your data as a string; the format of training data
@@ -3,7 +3,7 @@ module Anystyle
3
3
 
4
4
  class Parser
5
5
 
6
- @formats = [:bibtex, :hash, :citeproc].freeze
6
+ @formats = [:bibtex, :hash, :citeproc, :tags].freeze
7
7
 
8
8
  @defaults = {
9
9
  :model => File.expand_path('../support/anystyle.mod', __FILE__),
@@ -233,7 +233,13 @@ module Anystyle
233
233
  def format_citeproc(labels)
234
234
  format_bibtex(labels).to_citeproc
235
235
  end
236
-
236
+
237
+ def format_tags(labels)
238
+ labels.map do |line|
239
+ line.map { |label, token| "<#{label}>#{token}</#{label}>" }.join(' ')
240
+ end
241
+ end
242
+
237
243
  end
238
244
 
239
245
  end
@@ -1,5 +1,5 @@
1
1
  module Anystyle
2
2
  module Parser
3
- VERSION = '0.0.9'.freeze
3
+ VERSION = '0.0.10'.freeze
4
4
  end
5
5
  end
@@ -70,6 +70,35 @@ module Anystyle::Parser
70
70
  it 'returns an array of labelled segments' do
71
71
  subject.label(citation)[0].map(&:first).should == [:author, :title, :location, :publisher, :date, :pages]
72
72
  end
73
+
74
+ describe 'when passed more than one line' do
75
+ it 'returns two arrays' do
76
+ subject.label("foo\nbar").should have(2).elements
77
+ end
78
+ end
79
+
80
+ describe 'when passed invalid input' do
81
+ it 'returns an empty array for an empty string' do
82
+ subject.label('').should == []
83
+ end
84
+
85
+ it 'returns an empty array for an empty line' do
86
+ subject.label("\n").should == []
87
+ subject.label("\n ").should == [[],[]]
88
+ subject.label(" \n ").should == [[],[]]
89
+ subject.label(" \n").should == [[]]
90
+ end
91
+
92
+ it 'does not fail for unrecognizable input' do
93
+ lambda { subject.label("@misc{70213094902020,\n") }.should_not raise_error
94
+ lambda { subject.label("doi = {DOI:10.1503/jpn.100140}\n}\n") }.should_not raise_error
95
+
96
+ pending
97
+ lambda { subject.label("\n doi ") }.should_not raise_error
98
+ end
99
+ end
100
+
101
+
73
102
  end
74
103
 
75
104
  describe "#parse" do
@@ -78,6 +107,12 @@ module Anystyle::Parser
78
107
  it 'returns a hash of label/segment pairs by default' do
79
108
  subject.parse(citation)[0].should == { :author => 'Perec, Georges', :title => 'A Void', :location => 'London', :publisher => 'The Harvill Press', :year => 1995, :pages => '108', :type => :book }
80
109
  end
110
+
111
+ describe 'using output format "tags"' do
112
+ it 'returns a tagged string' do
113
+ subject.parse(citation, :tags)[0].should == '<author>Perec, Georges.</author> <title>A Void.</title> <location>London:</location> <publisher>The Harvill Press,</publisher> <date>1995.</date> <pages>p.108.</pages>'
114
+ end
115
+ end
81
116
  end
82
117
 
83
118
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anystyle-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-08 00:00:00.000000000Z
12
+ date: 2012-03-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bibtex-ruby
16
- requirement: &2157133020 !ruby/object:Gem::Requirement
16
+ requirement: &70338916773120 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2157133020
24
+ version_requirements: *70338916773120
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: wapiti
27
- requirement: &2157132440 !ruby/object:Gem::Requirement
27
+ requirement: &70338916772120 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0.0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2157132440
35
+ version_requirements: *70338916772120
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rake
38
- requirement: &2157131620 !ruby/object:Gem::Requirement
38
+ requirement: &70338916770100 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0.9'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2157131620
46
+ version_requirements: *70338916770100
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: racc
49
- requirement: &2157131060 !ruby/object:Gem::Requirement
49
+ requirement: &70338916769520 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '1.4'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2157131060
57
+ version_requirements: *70338916769520
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: cucumber
60
- requirement: &2157130540 !ruby/object:Gem::Requirement
60
+ requirement: &70338916768880 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '1.0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2157130540
68
+ version_requirements: *70338916768880
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
- requirement: &2157129900 !ruby/object:Gem::Requirement
71
+ requirement: &70338916768220 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '2.6'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2157129900
79
+ version_requirements: *70338916768220
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: ZenTest
82
- requirement: &2157129240 !ruby/object:Gem::Requirement
82
+ requirement: &70338916767680 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: '4.6'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2157129240
90
+ version_requirements: *70338916767680
91
91
  description: A sophisticated parser for academic references based on conditional random
92
92
  fields.
93
93
  email:
@@ -146,12 +146,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
146
146
  - - ! '>='
147
147
  - !ruby/object:Gem::Version
148
148
  version: '0'
149
+ segments:
150
+ - 0
151
+ hash: -640366899922045737
149
152
  required_rubygems_version: !ruby/object:Gem::Requirement
150
153
  none: false
151
154
  requirements:
152
155
  - - ! '>='
153
156
  - !ruby/object:Gem::Version
154
157
  version: '0'
158
+ segments:
159
+ - 0
160
+ hash: -640366899922045737
155
161
  requirements: []
156
162
  rubyforge_project:
157
163
  rubygems_version: 1.8.10