anystyle-parser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +0 -0
- data/.gitignore +5 -0
- data/.rspec +3 -0
- data/Gemfile +21 -0
- data/HISTORY.md +3 -0
- data/LICENSE +26 -0
- data/README.md +152 -0
- data/anystyle-parser.gemspec +37 -0
- data/features/step_definitions/parser_steps.rb +0 -0
- data/features/support/env.rb +1 -0
- data/lib/anystyle/parser/dictionary.rb +165 -0
- data/lib/anystyle/parser/errors.rb +19 -0
- data/lib/anystyle/parser/features.rb +164 -0
- data/lib/anystyle/parser/normalizer.rb +322 -0
- data/lib/anystyle/parser/parser.rb +240 -0
- data/lib/anystyle/parser/support/anystyle.mod +7891 -0
- data/lib/anystyle/parser/support/anystyle.pat +72 -0
- data/lib/anystyle/parser/support/dict.txt.gz +0 -0
- data/lib/anystyle/parser/utility.rb +19 -0
- data/lib/anystyle/parser/version.rb +5 -0
- data/lib/anystyle/parser.rb +17 -0
- data/spec/anystyle/parser/dictionary_spec.rb +31 -0
- data/spec/anystyle/parser/features_spec.rb +24 -0
- data/spec/anystyle/parser/normalizer_spec.rb +36 -0
- data/spec/anystyle/parser/parser_spec.rb +85 -0
- data/spec/benchmark.rb +74 -0
- data/spec/profile.rb +34 -0
- data/spec/spec_helper.rb +1 -0
- metadata +169 -0
@@ -0,0 +1,72 @@
|
|
1
|
+
# Feature numbers
|
2
|
+
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
|
3
|
+
# England a E En Eng Engl d nd and land england initial none 20 no-male no-female surname no-month place no-publisher no-journal none 0 others other
|
4
|
+
|
5
|
+
u:%x[-3,0]
|
6
|
+
u:%x[-2,0]
|
7
|
+
u:%x[-1,0]
|
8
|
+
u:%x[0,0]
|
9
|
+
u:%x[1,0]
|
10
|
+
u:%x[2,0]
|
11
|
+
u:%x[3,0]
|
12
|
+
u:%x[-1,0]/%x[0,0]
|
13
|
+
u:%x[0,0]/%x[1,0]
|
14
|
+
|
15
|
+
# last character type
|
16
|
+
u:%x[0,1]
|
17
|
+
u:%x[-1,1]
|
18
|
+
|
19
|
+
# first 1-4 characters
|
20
|
+
u:%x[0,2]
|
21
|
+
u:%x[0,3]
|
22
|
+
u:%x[0,4]
|
23
|
+
u:%x[0,5]
|
24
|
+
|
25
|
+
# last 1-4 characters
|
26
|
+
u:%x[0,6]
|
27
|
+
u:%x[0,7]
|
28
|
+
u:%x[0,8]
|
29
|
+
u:%x[0,9]
|
30
|
+
|
31
|
+
# no punctuation lower-case
|
32
|
+
u:%x[-2,10]
|
33
|
+
u:%x[-1,10]
|
34
|
+
u:%x[0,10]
|
35
|
+
u:%x[1,10]
|
36
|
+
u:%x[2,10]
|
37
|
+
|
38
|
+
# capitalization
|
39
|
+
u:%x[0,11]
|
40
|
+
|
41
|
+
# numbers
|
42
|
+
u:%x[-1,12]
|
43
|
+
u:%x[0,12]
|
44
|
+
u:%x[1,12]
|
45
|
+
u:%x[-1,12]/%x[0,12]
|
46
|
+
u:%x[0,12]/%x[1,12]
|
47
|
+
|
48
|
+
# dictionary
|
49
|
+
u:%x[0,13]
|
50
|
+
u:%x[0,14]
|
51
|
+
u:%x[0,15]
|
52
|
+
u:%x[0,16]
|
53
|
+
u:%x[0,17]
|
54
|
+
u:%x[0,18]
|
55
|
+
u:%x[0,19]
|
56
|
+
u:%x[0,20]
|
57
|
+
|
58
|
+
# possible editor
|
59
|
+
u:%x[0,21]
|
60
|
+
|
61
|
+
# position
|
62
|
+
u:%x[0,22]
|
63
|
+
|
64
|
+
# punctuation
|
65
|
+
u:%x[0,23]
|
66
|
+
|
67
|
+
# possible chapter
|
68
|
+
u:%x[-1,23]/%x[0,24]/%x[0,21]
|
69
|
+
u:%x[-1,23]/%x[0,24]/%x[1,11]
|
70
|
+
|
71
|
+
# bigram
|
72
|
+
b
|
Binary file
|
@@ -0,0 +1,17 @@
|
|
1
|
+
|
2
|
+
require 'singleton'
|
3
|
+
|
4
|
+
require 'bibtex'
|
5
|
+
require 'wapiti'
|
6
|
+
|
7
|
+
# require 'ruby-debug'
|
8
|
+
# Debugger.start
|
9
|
+
|
10
|
+
require 'anystyle/parser/errors'
|
11
|
+
|
12
|
+
require 'anystyle/parser/dictionary'
|
13
|
+
require 'anystyle/parser/features'
|
14
|
+
require 'anystyle/parser/parser'
|
15
|
+
require 'anystyle/parser/normalizer'
|
16
|
+
|
17
|
+
require 'anystyle/parser/utility'
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
module Anystyle
|
4
|
+
module Parser
|
5
|
+
|
6
|
+
describe "Dictionary" do
|
7
|
+
|
8
|
+
let(:dict) { Dictionary.instance }
|
9
|
+
|
10
|
+
it { Dictionary.should_not respond_to(:new) }
|
11
|
+
it { dict.should_not be nil }
|
12
|
+
|
13
|
+
|
14
|
+
describe "the dictionary" do
|
15
|
+
|
16
|
+
%w{ philippines italy }.each do |place|
|
17
|
+
it "#{place.inspect} should be a place name" do
|
18
|
+
dict[place].should == Dictionary.code[:place]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it "accepts unicode strins like 'çela' (surname)" do
|
23
|
+
(dict['çela'] & Dictionary.code[:surname]).should > 0
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
module Anystyle::Parser
|
4
|
+
describe "Features" do
|
5
|
+
|
6
|
+
describe "numbers" do
|
7
|
+
let(:f) { Parser.feature[:numbers] }
|
8
|
+
|
9
|
+
%w{ (1992) 1992 2011 1776 }.each do |year|
|
10
|
+
it "returns :year for #{year.inspect}" do
|
11
|
+
f.match(year).should == :year
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
%w{ (1) (12) (123) }.each do |year|
|
16
|
+
it "returns :year for #{year.inspect}" do
|
17
|
+
f.match(year).should == :numeric
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Anystyle
|
2
|
+
module Parser
|
3
|
+
|
4
|
+
describe "Normalizer" do
|
5
|
+
|
6
|
+
describe "#tokenize_names" do
|
7
|
+
|
8
|
+
it "tokenizes 'A B'" do
|
9
|
+
Normalizer.instance.tokenize_names('A B').should == ['A B']
|
10
|
+
end
|
11
|
+
|
12
|
+
it "tokenizes 'A, B'" do
|
13
|
+
Normalizer.instance.tokenize_names('A, B').should == ['A, B']
|
14
|
+
end
|
15
|
+
|
16
|
+
it "tokenizes 'A, jr., B'" do
|
17
|
+
Normalizer.instance.tokenize_names('A, jr., B').should == ['A, jr., B']
|
18
|
+
end
|
19
|
+
|
20
|
+
it "tokenizes 'A, B, jr.'" do
|
21
|
+
Normalizer.instance.tokenize_names('A, B, jr.').should == ['A, B, jr.']
|
22
|
+
end
|
23
|
+
|
24
|
+
it "tokenizes 'A, B, C, D'" do
|
25
|
+
Normalizer.instance.tokenize_names('A, B, C, D').should == ['A, B', ' C, D']
|
26
|
+
end
|
27
|
+
|
28
|
+
it "tokenizes 'A, B, C'" do
|
29
|
+
Normalizer.instance.tokenize_names('A, B, C').should == ['A, B', ' C']
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module Anystyle::Parser
|
2
|
+
describe Parser do
|
3
|
+
|
4
|
+
it { should_not be nil }
|
5
|
+
|
6
|
+
describe "#tokenize" do
|
7
|
+
it "returns [] when given an empty string" do
|
8
|
+
subject.tokenize('').should == []
|
9
|
+
end
|
10
|
+
|
11
|
+
it "takes a single line and returns an array of token sequences" do
|
12
|
+
subject.tokenize('hello, world!').should == [%w{ hello, world! }]
|
13
|
+
end
|
14
|
+
|
15
|
+
it "takes two lines and returns an array of token sequences" do
|
16
|
+
subject.tokenize("hello, world!\ngoodbye!").should == [%w{ hello, world! }, %w{ goodbye! }]
|
17
|
+
end
|
18
|
+
|
19
|
+
context "when passing a string marked as tagged" do
|
20
|
+
it "returns [] when given an empty string" do
|
21
|
+
subject.tokenize('', true).should == []
|
22
|
+
end
|
23
|
+
|
24
|
+
it "returns an array of :unknown token sequences when given an untagged single line" do
|
25
|
+
subject.tokenize('hello, world!', true).should == [[['hello,', :unknown], ['world!', :unknown]]]
|
26
|
+
end
|
27
|
+
|
28
|
+
it "returns an array of :unknown token sequences when given two untagged lines" do
|
29
|
+
subject.tokenize("hello,\nworld!", true).should == [[['hello,', :unknown]], [['world!', :unknown]]]
|
30
|
+
end
|
31
|
+
|
32
|
+
it "returns an array of token/tag pair for each line when given a single tagged string" do
|
33
|
+
subject.tokenize('<a>hello</a>', true).should == [[['hello', :a]]]
|
34
|
+
end
|
35
|
+
|
36
|
+
it "returns an array of token/tag pair for each line when given a string with multiple tags" do
|
37
|
+
subject.tokenize('<a>hello world</a> <b> !</b>', true).should == [[['hello',:a], ['world', :a], ['!', :b]]]
|
38
|
+
end
|
39
|
+
|
40
|
+
it "raises an argument error if the string contains mismatched tags" do
|
41
|
+
expect { subject.tokenize('<a> hello </b>', true) }.to raise_error(ArgumentError)
|
42
|
+
expect { subject.tokenize('<a> hello <b> world </a>', true) }.to raise_error(ArgumentError)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
describe "#prepare" do
|
49
|
+
it 'returns an array of expanded token sequences' do
|
50
|
+
subject.prepare('hello, world!').should == [['hello, , h he hel hell , o, lo, llo, hello other none 0 no-male no-female no-surname no-month no-place no-publisher no-journal no-editors 0 internal other', 'world! ! w wo wor worl ! d! ld! rld! world other none 36 no-male no-female surname no-month no-place publisher no-journal no-editors 5 terminal other']]
|
51
|
+
end
|
52
|
+
|
53
|
+
context 'when marking the input as being tagged' do
|
54
|
+
let(:input) { %{<author> A. Cau, R. Kuiper, and W.-P. de Roever. </author> <title> Formalising Dijkstra's development strategy within Stark's formalism. </title> <editor> In C. B. Jones, R. C. Shaw, and T. Denvir, editors, </editor> <booktitle> Proc. 5th. BCS-FACS Refinement Workshop, </booktitle> <date> 1992. </date>} }
|
55
|
+
|
56
|
+
it 'returns an array of expaned and labelled token sequences for a tagged string' do
|
57
|
+
subject.prepare(input, true)[0].map { |t| t[/\S+$/] }.should == %w{ author author author author author author author author title title title title title title title editor editor editor editor editor editor editor editor editor editor editor booktitle booktitle booktitle booktitle booktitle date }
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'returns an array of expanded and labelled :unknown token sequences for an untagged input' do
|
61
|
+
subject.prepare('hello, world!', true)[0].map { |t| t[/\S+$/] }.should == %w{ unknown unknown }
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
describe "#label" do
|
68
|
+
let(:citation) { 'Perec, Georges. A Void. London: The Harvill Press, 1995. p.108.' }
|
69
|
+
|
70
|
+
it 'returns an array of labelled segments' do
|
71
|
+
subject.label(citation)[0].map(&:first).should == [:author, :title, :location, :publisher, :date, :pages]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
describe "#parse" do
|
76
|
+
let(:citation) { 'Perec, Georges. A Void. London: The Harvill Press, 1995. p.108.' }
|
77
|
+
|
78
|
+
it 'returns a hash of label/segment pairs by default' do
|
79
|
+
subject.parse(citation)[0].should == { :author => 'Perec, Georges', :title => 'A Void', :location => 'London', :publisher => 'The Harvill Press', :year => 1995, :pages => '108', :type => :book }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
data/spec/benchmark.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'anystyle/parser'
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
include Benchmark
|
5
|
+
|
6
|
+
data = <<-END_REFERENCES
|
7
|
+
<author> A. Cau, R. Kuiper, and W.-P. de Roever. </author> <title> Formalising Dijkstra's development strategy within Stark's formalism. </title> <editor> In C. B. Jones, R. C. Shaw, and T. Denvir, editors, </editor> <booktitle> Proc. 5th. BCS-FACS Refinement Workshop, </booktitle> <date> 1992. </date>
|
8
|
+
<author> M. Kitsuregawa, H. Tanaka, and T. Moto-oka. </author> <title> Application of hash to data base machine and its architecture. </title> <journal> New Generation Computing, </journal> <volume> 1(1), </volume> <date> 1983. </date>
|
9
|
+
<author> Alexander Vrchoticky. </author> <title> Modula/R language definition. </title> <tech> Technical Report TU Wien rr-02-92, version 2.0, </tech> <institution> Dept. for Real-Time Systems, Technical University of Vienna, </institution> <date> May 1993. </date>
|
10
|
+
<author> Marc Shapiro and Susan Horwitz. </author> <title> Fast and accurate flow-insensitive points-to analysis. </title> <booktitle> In Proceedings of the 24th Annual ACM Symposium on Principles of Programming Languages, </booktitle> <date> January 1997. </date>
|
11
|
+
<author> W. Landi and B. G. Ryder. </author> <title> Aliasing with and without pointers: A problem taxonomy. </title> <institution> Center for Computer Aids for Industrial Productivity </institution> <tech> Technical Report CAIP-TR-125, </tech> <institution> Rutgers University, </institution> <date> September 1990. </date>
|
12
|
+
<author> W. H. Enright. </author> <title> Improving the efficiency of matrix operations in the numerical solution of stiff ordinary differential equations. </title> <journal> ACM Trans. Math. Softw., </journal> <volume> 4(2), </volume> <pages> 127-136, </pages> <date> June 1978. </date>
|
13
|
+
<author> Gmytrasiewicz, P. J., Durfee, E. H., & Wehe, D. K. </author> <date> (1991a). </date> <title> A decision theoretic approach to coordinating multiagent interaction. </title> <booktitle> In Proceedings of the Twelfth International Joint Conference on Artificial Intelligence, </booktitle> <pages> pp. 62-68 </pages> <location> Sydney, Australia. </location>
|
14
|
+
<author> A. Bookstein and S. T. Klein, </author> <title> Detecting content-bearing words by serial clustering, </title> <booktitle> Proceedings of the Nineteenth Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, </booktitle> <pages> pp. 319327, </pages> <date> 1995. </date>
|
15
|
+
<author> U. Dayal, H. Garcia-Molina, M. Hsu, B. Kao, and M.- C. Shan. </author> <title> Third generation TP monitors: A database challenge. </title> <booktitle> In ACM SIGMOD Conference on Management of Data, </booktitle> <pages> pages 393-397, </pages> <location> Washington, D. C., </location> <date> May 1993. </date>
|
16
|
+
<author> C. Qiao and R. Melhem, </author> <title> "Reducing Communication Latency with Path Multiplexing in Optically Interconnected Multiprocessor Systems", </title> <booktitle> Proc. of HPCA-1, </booktitle> <date> 1995. </date>
|
17
|
+
END_REFERENCES
|
18
|
+
|
19
|
+
data = data * 100
|
20
|
+
|
21
|
+
data = data.split("\n")
|
22
|
+
|
23
|
+
Anystyle::Parser::Feature.load_dictionary
|
24
|
+
parser = Anystyle::Parser::Parser.instance
|
25
|
+
|
26
|
+
n, k = 100, 5
|
27
|
+
|
28
|
+
f = []
|
29
|
+
g = []
|
30
|
+
|
31
|
+
Benchmark.benchmark((" "*15) + CAPTION, 7, FMTSTR, '%14s:' % 'sum(f)', '%14s:' % 'sum(g)') do |b|
|
32
|
+
1.step(n,k) do |i|
|
33
|
+
|
34
|
+
input = data[0,i]
|
35
|
+
f << b.report('%14s:' % "f(#{i})") do
|
36
|
+
input.each { |line| parser.prepare(line, true) }
|
37
|
+
end
|
38
|
+
|
39
|
+
input = input.join("\n")
|
40
|
+
g << b.report('%14s:' % "g(#{i})") do
|
41
|
+
parser.prepare(input, true)
|
42
|
+
end
|
43
|
+
|
44
|
+
[f.reduce(:+), g.reduce(:+)]
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
require 'gnuplot'
|
50
|
+
|
51
|
+
f = f.map(&:total)
|
52
|
+
g = g.map(&:total)
|
53
|
+
|
54
|
+
x = 1.step(n,k).to_a
|
55
|
+
|
56
|
+
Gnuplot.open do |gp|
|
57
|
+
Gnuplot::Plot.new(gp) do |plot|
|
58
|
+
plot.title 'Anystyle Parser Benchmark'
|
59
|
+
plot.ylabel 't'
|
60
|
+
plot.xlabel 'n'
|
61
|
+
|
62
|
+
plot.data << Gnuplot::DataSet.new([x,f]) do |ds|
|
63
|
+
ds.with = 'linespoints'
|
64
|
+
ds.title = 'f'
|
65
|
+
end
|
66
|
+
|
67
|
+
plot.data << Gnuplot::DataSet.new([x,g]) do |ds|
|
68
|
+
ds.with = 'linespoints'
|
69
|
+
ds.title = 'g'
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
data/spec/profile.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'anystyle/parser'
|
2
|
+
|
3
|
+
require 'tempfile'
|
4
|
+
require 'ruby-prof'
|
5
|
+
|
6
|
+
data = <<-END_REFERENCES
|
7
|
+
<author> A. Cau, R. Kuiper, and W.-P. de Roever. </author> <title> Formalising Dijkstra's development strategy within Stark's formalism. </title> <editor> In C. B. Jones, R. C. Shaw, and T. Denvir, editors, </editor> <booktitle> Proc. 5th. BCS-FACS Refinement Workshop, </booktitle> <date> 1992. </date>
|
8
|
+
<author> M. Kitsuregawa, H. Tanaka, and T. Moto-oka. </author> <title> Application of hash to data base machine and its architecture. </title> <journal> New Generation Computing, </journal> <volume> 1(1), </volume> <date> 1983. </date>
|
9
|
+
<author> Alexander Vrchoticky. </author> <title> Modula/R language definition. </title> <tech> Technical Report TU Wien rr-02-92, version 2.0, </tech> <institution> Dept. for Real-Time Systems, Technical University of Vienna, </institution> <date> May 1993. </date>
|
10
|
+
<author> Marc Shapiro and Susan Horwitz. </author> <title> Fast and accurate flow-insensitive points-to analysis. </title> <booktitle> In Proceedings of the 24th Annual ACM Symposium on Principles of Programming Languages, </booktitle> <date> January 1997. </date>
|
11
|
+
<author> W. Landi and B. G. Ryder. </author> <title> Aliasing with and without pointers: A problem taxonomy. </title> <institution> Center for Computer Aids for Industrial Productivity </institution> <tech> Technical Report CAIP-TR-125, </tech> <institution> Rutgers University, </institution> <date> September 1990. </date>
|
12
|
+
<author> W. H. Enright. </author> <title> Improving the efficiency of matrix operations in the numerical solution of stiff ordinary differential equations. </title> <journal> ACM Trans. Math. Softw., </journal> <volume> 4(2), </volume> <pages> 127-136, </pages> <date> June 1978. </date>
|
13
|
+
<author> Gmytrasiewicz, P. J., Durfee, E. H., & Wehe, D. K. </author> <date> (1991a). </date> <title> A decision theoretic approach to coordinating multiagent interaction. </title> <booktitle> In Proceedings of the Twelfth International Joint Conference on Artificial Intelligence, </booktitle> <pages> pp. 62-68 </pages> <location> Sydney, Australia. </location>
|
14
|
+
<author> A. Bookstein and S. T. Klein, </author> <title> Detecting content-bearing words by serial clustering, </title> <booktitle> Proceedings of the Nineteenth Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, </booktitle> <pages> pp. 319327, </pages> <date> 1995. </date>
|
15
|
+
<author> U. Dayal, H. Garcia-Molina, M. Hsu, B. Kao, and M.- C. Shan. </author> <title> Third generation TP monitors: A database challenge. </title> <booktitle> In ACM SIGMOD Conference on Management of Data, </booktitle> <pages> pages 393-397, </pages> <location> Washington, D. C., </location> <date> May 1993. </date>
|
16
|
+
<author> C. Qiao and R. Melhem, </author> <title> "Reducing Communication Latency with Path Multiplexing in Optically Interconnected Multiprocessor Systems", </title> <booktitle> Proc. of HPCA-1, </booktitle> <date> 1995. </date>
|
17
|
+
END_REFERENCES
|
18
|
+
|
19
|
+
|
20
|
+
Anystyle::Parser::Feature.load_dictionary
|
21
|
+
parser = Anystyle::Parser::Parser.instance
|
22
|
+
|
23
|
+
|
24
|
+
result = RubyProf.profile do
|
25
|
+
parser.prepare(data, true)
|
26
|
+
end
|
27
|
+
|
28
|
+
dot = Tempfile.new('dot')
|
29
|
+
RubyProf::DotPrinter.new(result).print(dot, :min_percent => 5)
|
30
|
+
dot.close
|
31
|
+
|
32
|
+
system "dot -Tpng -oprofile.png #{dot.path}"
|
33
|
+
|
34
|
+
# dot.unlink
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'anystyle/parser'
|
metadata
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: anystyle-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Sylvester Keil
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-09-05 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bibtex-ruby
|
16
|
+
requirement: &2152150220 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.3'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *2152150220
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: wapiti
|
27
|
+
requirement: &2152110260 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0.0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *2152110260
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rake
|
38
|
+
requirement: &2152107240 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ~>
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0.9'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *2152107240
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: racc
|
49
|
+
requirement: &2152096280 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.4'
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *2152096280
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: cucumber
|
60
|
+
requirement: &2152093880 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ~>
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '1.0'
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *2152093880
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: &2152091560 !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ~>
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '2.6'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: *2152091560
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: ZenTest
|
82
|
+
requirement: &2152088380 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ~>
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '4.6'
|
88
|
+
type: :development
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *2152088380
|
91
|
+
description: A sophisticated parser for academic references based on conditional random
|
92
|
+
fields.
|
93
|
+
email:
|
94
|
+
- http://sylvester.keil.or.at
|
95
|
+
executables: []
|
96
|
+
extensions: []
|
97
|
+
extra_rdoc_files:
|
98
|
+
- README.md
|
99
|
+
- LICENSE
|
100
|
+
files:
|
101
|
+
- .autotest
|
102
|
+
- .gitignore
|
103
|
+
- .rspec
|
104
|
+
- Gemfile
|
105
|
+
- HISTORY.md
|
106
|
+
- LICENSE
|
107
|
+
- README.md
|
108
|
+
- anystyle-parser.gemspec
|
109
|
+
- features/step_definitions/parser_steps.rb
|
110
|
+
- features/support/env.rb
|
111
|
+
- lib/anystyle/parser.rb
|
112
|
+
- lib/anystyle/parser/dictionary.rb
|
113
|
+
- lib/anystyle/parser/errors.rb
|
114
|
+
- lib/anystyle/parser/features.rb
|
115
|
+
- lib/anystyle/parser/normalizer.rb
|
116
|
+
- lib/anystyle/parser/parser.rb
|
117
|
+
- lib/anystyle/parser/support/anystyle.mod
|
118
|
+
- lib/anystyle/parser/support/anystyle.pat
|
119
|
+
- lib/anystyle/parser/support/dict.txt.gz
|
120
|
+
- lib/anystyle/parser/utility.rb
|
121
|
+
- lib/anystyle/parser/version.rb
|
122
|
+
- spec/anystyle/parser/dictionary_spec.rb
|
123
|
+
- spec/anystyle/parser/features_spec.rb
|
124
|
+
- spec/anystyle/parser/normalizer_spec.rb
|
125
|
+
- spec/anystyle/parser/parser_spec.rb
|
126
|
+
- spec/benchmark.rb
|
127
|
+
- spec/profile.rb
|
128
|
+
- spec/spec_helper.rb
|
129
|
+
homepage: http://inukshuk.github.com/anystyle-parser
|
130
|
+
licenses:
|
131
|
+
- FreeBSD
|
132
|
+
post_install_message:
|
133
|
+
rdoc_options:
|
134
|
+
- --line-numbers
|
135
|
+
- --inline-source
|
136
|
+
- --title
|
137
|
+
- ! '"Anystyle Parser"'
|
138
|
+
- --main
|
139
|
+
- README.md
|
140
|
+
require_paths:
|
141
|
+
- lib
|
142
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
143
|
+
none: false
|
144
|
+
requirements:
|
145
|
+
- - ! '>='
|
146
|
+
- !ruby/object:Gem::Version
|
147
|
+
version: '0'
|
148
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
|
+
none: false
|
150
|
+
requirements:
|
151
|
+
- - ! '>='
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: '0'
|
154
|
+
requirements: []
|
155
|
+
rubyforge_project:
|
156
|
+
rubygems_version: 1.8.10
|
157
|
+
signing_key:
|
158
|
+
specification_version: 3
|
159
|
+
summary: Parser for academic references.
|
160
|
+
test_files:
|
161
|
+
- features/step_definitions/parser_steps.rb
|
162
|
+
- features/support/env.rb
|
163
|
+
- spec/anystyle/parser/dictionary_spec.rb
|
164
|
+
- spec/anystyle/parser/features_spec.rb
|
165
|
+
- spec/anystyle/parser/normalizer_spec.rb
|
166
|
+
- spec/anystyle/parser/parser_spec.rb
|
167
|
+
- spec/benchmark.rb
|
168
|
+
- spec/profile.rb
|
169
|
+
- spec/spec_helper.rb
|