anystyle-parser 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/HISTORY.md CHANGED
@@ -1,3 +1,7 @@
1
+ 0.0.10 / 2012-03-01
2
+ ===================
3
+ * Added new output format: tags (to generate training data)
4
+
1
5
  0.0.9 / 2011-09-08
2
6
  ==================
3
7
  * Added year-range / page-range feature distinction
data/README.md CHANGED
@@ -41,19 +41,19 @@ Usage
41
41
 
42
42
  You can access the main Anystyle-Parser instance at `Anystyle.parser`;
43
43
  the `#parse` method is also available via `Anystyle.parse`. For more complex
44
- requirements (e.g., if you need multiple Parser simultaneously) you can create
45
- your own instances from the `Anystyle::Parser::Parser` class.
44
+ requirements (e.g., if you need multiple Parser instances simultaneously) you
45
+ can create your own instances from the `Anystyle::Parser::Parser` class.
46
46
 
47
47
  The two fundamental methods you need to know about in order to use
48
48
  Anystyle-Parser are `#parse` and `#train` that both accept two arguments.
49
49
 
50
50
  Parser#parse(input, format = :hash)
51
- Parser#train(input, truncate = false)
51
+ Parser#train(input = options[:training_data], truncate = true)
52
52
 
53
53
  `#parse` parses the passed-in input (either a filename, your reference strings,
54
54
  or an array of your reference strings) and returns the parsed data in the
55
55
  format specified as the second argument (supported formats include: *:hash*,
56
- *:bibtex*, and *:citeproc*).
56
+ *:bibtex*, *:citeproc*, and *:tags*).
57
57
 
58
58
  `#train` allows you to easily train the Parser's CRF model. The first argument
59
59
  is either a filename or your data as a string; the format of training data
@@ -3,7 +3,7 @@ module Anystyle
3
3
 
4
4
  class Parser
5
5
 
6
- @formats = [:bibtex, :hash, :citeproc].freeze
6
+ @formats = [:bibtex, :hash, :citeproc, :tags].freeze
7
7
 
8
8
  @defaults = {
9
9
  :model => File.expand_path('../support/anystyle.mod', __FILE__),
@@ -233,7 +233,13 @@ module Anystyle
233
233
  def format_citeproc(labels)
234
234
  format_bibtex(labels).to_citeproc
235
235
  end
236
-
236
+
237
+ def format_tags(labels)
238
+ labels.map do |line|
239
+ line.map { |label, token| "<#{label}>#{token}</#{label}>" }.join(' ')
240
+ end
241
+ end
242
+
237
243
  end
238
244
 
239
245
  end
@@ -1,5 +1,5 @@
1
1
  module Anystyle
2
2
  module Parser
3
- VERSION = '0.0.9'.freeze
3
+ VERSION = '0.0.10'.freeze
4
4
  end
5
5
  end
@@ -70,6 +70,35 @@ module Anystyle::Parser
70
70
  it 'returns an array of labelled segments' do
71
71
  subject.label(citation)[0].map(&:first).should == [:author, :title, :location, :publisher, :date, :pages]
72
72
  end
73
+
74
+ describe 'when passed more than one line' do
75
+ it 'returns two arrays' do
76
+ subject.label("foo\nbar").should have(2).elements
77
+ end
78
+ end
79
+
80
+ describe 'when passed invalid input' do
81
+ it 'returns an empty array for an empty string' do
82
+ subject.label('').should == []
83
+ end
84
+
85
+ it 'returns an empty array for an empty line' do
86
+ subject.label("\n").should == []
87
+ subject.label("\n ").should == [[],[]]
88
+ subject.label(" \n ").should == [[],[]]
89
+ subject.label(" \n").should == [[]]
90
+ end
91
+
92
+ it 'does not fail for unrecognizable input' do
93
+ lambda { subject.label("@misc{70213094902020,\n") }.should_not raise_error
94
+ lambda { subject.label("doi = {DOI:10.1503/jpn.100140}\n}\n") }.should_not raise_error
95
+
96
+ pending
97
+ lambda { subject.label("\n doi ") }.should_not raise_error
98
+ end
99
+ end
100
+
101
+
73
102
  end
74
103
 
75
104
  describe "#parse" do
@@ -78,6 +107,12 @@ module Anystyle::Parser
78
107
  it 'returns a hash of label/segment pairs by default' do
79
108
  subject.parse(citation)[0].should == { :author => 'Perec, Georges', :title => 'A Void', :location => 'London', :publisher => 'The Harvill Press', :year => 1995, :pages => '108', :type => :book }
80
109
  end
110
+
111
+ describe 'using output format "tags"' do
112
+ it 'returns a tagged string' do
113
+ subject.parse(citation, :tags)[0].should == '<author>Perec, Georges.</author> <title>A Void.</title> <location>London:</location> <publisher>The Harvill Press,</publisher> <date>1995.</date> <pages>p.108.</pages>'
114
+ end
115
+ end
81
116
  end
82
117
 
83
118
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anystyle-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-08 00:00:00.000000000Z
12
+ date: 2012-03-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bibtex-ruby
16
- requirement: &2157133020 !ruby/object:Gem::Requirement
16
+ requirement: &70338916773120 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2157133020
24
+ version_requirements: *70338916773120
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: wapiti
27
- requirement: &2157132440 !ruby/object:Gem::Requirement
27
+ requirement: &70338916772120 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0.0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2157132440
35
+ version_requirements: *70338916772120
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rake
38
- requirement: &2157131620 !ruby/object:Gem::Requirement
38
+ requirement: &70338916770100 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0.9'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2157131620
46
+ version_requirements: *70338916770100
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: racc
49
- requirement: &2157131060 !ruby/object:Gem::Requirement
49
+ requirement: &70338916769520 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '1.4'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2157131060
57
+ version_requirements: *70338916769520
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: cucumber
60
- requirement: &2157130540 !ruby/object:Gem::Requirement
60
+ requirement: &70338916768880 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '1.0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2157130540
68
+ version_requirements: *70338916768880
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
- requirement: &2157129900 !ruby/object:Gem::Requirement
71
+ requirement: &70338916768220 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '2.6'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2157129900
79
+ version_requirements: *70338916768220
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: ZenTest
82
- requirement: &2157129240 !ruby/object:Gem::Requirement
82
+ requirement: &70338916767680 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: '4.6'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2157129240
90
+ version_requirements: *70338916767680
91
91
  description: A sophisticated parser for academic references based on conditional random
92
92
  fields.
93
93
  email:
@@ -146,12 +146,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
146
146
  - - ! '>='
147
147
  - !ruby/object:Gem::Version
148
148
  version: '0'
149
+ segments:
150
+ - 0
151
+ hash: -640366899922045737
149
152
  required_rubygems_version: !ruby/object:Gem::Requirement
150
153
  none: false
151
154
  requirements:
152
155
  - - ! '>='
153
156
  - !ruby/object:Gem::Version
154
157
  version: '0'
158
+ segments:
159
+ - 0
160
+ hash: -640366899922045737
155
161
  requirements: []
156
162
  rubyforge_project:
157
163
  rubygems_version: 1.8.10