stanfordparser-infochimps 2.2.1.s
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +6 -0
- data/LICENSE +18 -0
- data/README.orig.rdoc +123 -0
- data/README.rdoc +37 -0
- data/Rakefile +60 -0
- data/TESTS_STATUS.rdoc +2 -0
- data/VERSION.yml +5 -0
- data/examples/stanford-sentence-parser.rb +46 -0
- data/lib/stanfordparser.rb +453 -0
- data/lib/stanfordparser/java_object.rb +129 -0
- data/stanfordparser.gemspec +69 -0
- data/test/test_stanfordparser.rb +224 -0
- metadata +207 -0
@@ -0,0 +1,129 @@
|
|
1
|
+
# Copyright 2007-2008 William Patrick McNeill
|
2
|
+
#
|
3
|
+
# This file is part of the Stanford Parser Ruby Wrapper.
|
4
|
+
#
|
5
|
+
# The Stanford Parser Ruby Wrapper is free software; you can redistribute it
|
6
|
+
# and/or modify it under the terms of the GNU General Public License as
|
7
|
+
# published by the Free Software Foundation; either version 2 of the License,
|
8
|
+
# or (at your option) any later version.
|
9
|
+
#
|
10
|
+
# The Stanford Parser Ruby Wrapper is distributed in the hope that it will be
|
11
|
+
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
13
|
+
# Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License along with
|
16
|
+
# editalign; if not, write to the Free Software Foundation, Inc., 51 Franklin
|
17
|
+
# St, Fifth Floor, Boston, MA 02110-1301 USA
|
18
|
+
|
19
|
+
# Extenions to the {Ruby-Java Bridge}[http://rjb.rubyforge.org/] module that
|
20
|
+
# add a generic Java object wrapper class.
|
21
|
+
module Rjb
|
22
|
+
|
23
|
+
#--
|
24
|
+
# The documentation for this class appears next to its extension inside the
|
25
|
+
# StanfordParser module in stanfordparser.rb. This should be changed if Rjb
|
26
|
+
# is ever moved into its own gem. See the documention in stanfordparser.rb
|
27
|
+
# for more details.
|
28
|
+
#++
|
29
|
+
class JavaObjectWrapper
|
30
|
+
include Enumerable
|
31
|
+
|
32
|
+
# The underlying Java object.
|
33
|
+
attr_reader :java_object
|
34
|
+
|
35
|
+
# Initialize with a Java object <em>obj</em>. If <em>obj</em> is a
|
36
|
+
# String, treat it as a Java class name and instantiate it. Otherwise,
|
37
|
+
# treat <em>obj</em> as an instance of a Java object.
|
38
|
+
def initialize(obj, *args)
|
39
|
+
@java_object = obj.class == String ?
|
40
|
+
Rjb::import(obj).send(:new, *args) : obj
|
41
|
+
end
|
42
|
+
|
43
|
+
# Enumerate all the items in the object using its iterator. If the object
|
44
|
+
# has no iterator, this function yields nothing.
|
45
|
+
def each
|
46
|
+
if @java_object.getClass.getMethods.any? {|m| m.getName == "iterator"}
|
47
|
+
i = @java_object.iterator
|
48
|
+
while i.hasNext
|
49
|
+
yield wrap_java_object(i.next)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end # each
|
53
|
+
|
54
|
+
# Reflect unhandled method calls to the underlying Java object and wrap
|
55
|
+
# the return value in the appropriate Ruby object.
|
56
|
+
def method_missing(m, *args)
|
57
|
+
begin
|
58
|
+
wrap_java_object(@java_object.send(m, *args))
|
59
|
+
rescue RuntimeError => e
|
60
|
+
# The instance method failed. See if this is a static method.
|
61
|
+
if not e.message.match(/^Fail: unknown method name/).nil?
|
62
|
+
getClass.send(m, *args)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Convert a value returned by a call to the underlying Java object to the
|
68
|
+
# appropriate Ruby object.
|
69
|
+
#
|
70
|
+
# If the value is a JavaObjectWrapper, convert it using a protected
|
71
|
+
# function with the name wrap_ followed by the underlying object's
|
72
|
+
# classname with the Java path delimiters converted to underscores. For
|
73
|
+
# example, a <tt>java.util.ArrayList</tt> would be converted by a function
|
74
|
+
# called wrap_java_util_ArrayList.
|
75
|
+
#
|
76
|
+
# If the value lacks the appropriate converter function, wrap it in a
|
77
|
+
# generic JavaObjectWrapper.
|
78
|
+
#
|
79
|
+
# If the value is not a JavaObjectWrapper, return it unchanged.
|
80
|
+
#
|
81
|
+
# This function is called recursively for every element in an Array.
|
82
|
+
def wrap_java_object(object)
|
83
|
+
if object.kind_of?(Array)
|
84
|
+
object.collect {|item| wrap_java_object(item)}
|
85
|
+
elsif object.respond_to?(:_classname)
|
86
|
+
# Ruby-Java Bridge Java objects all have a _classname member which
|
87
|
+
# tells the name of their Java class. Convert this to the
|
88
|
+
# corresponding wrapper function name.
|
89
|
+
wrapper_name = ("wrap_" + object._classname.gsub(/\./, "_")).to_sym
|
90
|
+
respond_to?(wrapper_name) ? send(wrapper_name, object) : JavaObjectWrapper.new(object)
|
91
|
+
else
|
92
|
+
object
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Convert <tt>java.util.ArrayList</tt> objects to Ruby Array objects.
|
97
|
+
def wrap_java_util_ArrayList(object)
|
98
|
+
array_list = []
|
99
|
+
object.size.times do
|
100
|
+
|i| array_list << wrap_java_object(object.get(i))
|
101
|
+
end
|
102
|
+
array_list
|
103
|
+
end
|
104
|
+
|
105
|
+
# Convert <tt>java.util.HashSet</tt> objects to Ruby Set objects.
|
106
|
+
def wrap_java_util_HashSet(object)
|
107
|
+
set = Set.new
|
108
|
+
i = object.iterator
|
109
|
+
while i.hasNext
|
110
|
+
set << wrap_java_object(i.next)
|
111
|
+
end
|
112
|
+
set
|
113
|
+
end
|
114
|
+
|
115
|
+
# Show the classname of the underlying Java object.
|
116
|
+
def inspect
|
117
|
+
"<#{@java_object._classname}>"
|
118
|
+
end
|
119
|
+
|
120
|
+
# Use the underlying Java object's stringification.
|
121
|
+
def to_s
|
122
|
+
toString
|
123
|
+
end
|
124
|
+
|
125
|
+
protected :wrap_java_object, :wrap_java_util_ArrayList, :wrap_java_util_HashSet
|
126
|
+
|
127
|
+
end # JavaObjectWrapper
|
128
|
+
|
129
|
+
end # Rjb
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{stanfordparser}
|
8
|
+
s.version = "2.2.1.s"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new("> 1.3.1") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["John Wilkinson", "Bill McNeal"]
|
12
|
+
s.date = %q{2010-06-21}
|
13
|
+
s.description = %q{Ruby wrapper of the Stanford Parser, a NLP parser built in Java.}
|
14
|
+
s.email = %q{jcwilk@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.orig",
|
18
|
+
"README.rdoc"
|
19
|
+
]
|
20
|
+
s.files = [
|
21
|
+
".document",
|
22
|
+
".gitignore",
|
23
|
+
"LICENSE",
|
24
|
+
"README.orig",
|
25
|
+
"README.rdoc",
|
26
|
+
"Rakefile",
|
27
|
+
"VERSION.yml",
|
28
|
+
"examples/connection_finder.rb",
|
29
|
+
"examples/stanford-sentence-parser.rb",
|
30
|
+
"lib/stanfordparser.rb",
|
31
|
+
"lib/stanfordparser/java_object.rb",
|
32
|
+
"spec/spec.opts",
|
33
|
+
"spec/spec_helper.rb",
|
34
|
+
"spec/stanfordparser_spec.rb",
|
35
|
+
"test/test_stanfordparser.rb"
|
36
|
+
]
|
37
|
+
s.homepage = %q{http://github.com/jcwilk/stanfordparser}
|
38
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
39
|
+
s.require_paths = ["lib"]
|
40
|
+
s.rubygems_version = %q{1.3.7}
|
41
|
+
s.summary = %q{GitHub upload/extension of Bill McNeal's stanfordparser rubygem}
|
42
|
+
s.test_files = [
|
43
|
+
"spec/spec_helper.rb",
|
44
|
+
"spec/stanfordparser_spec.rb",
|
45
|
+
"test/test_stanfordparser.rb",
|
46
|
+
"examples/connection_finder.rb",
|
47
|
+
"examples/stanford-sentence-parser.rb"
|
48
|
+
]
|
49
|
+
|
50
|
+
if s.respond_to? :specification_version then
|
51
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
52
|
+
s.specification_version = 3
|
53
|
+
|
54
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
55
|
+
s.add_runtime_dependency(%q<rjb>, [">= 1.2.5"])
|
56
|
+
s.add_runtime_dependency(%q<treebank>, [">= 3.0.0"])
|
57
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
58
|
+
else
|
59
|
+
s.add_dependency(%q<rjb>, [">= 1.2.5"])
|
60
|
+
s.add_dependency(%q<treebank>, [">= 3.0.0"])
|
61
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
62
|
+
end
|
63
|
+
else
|
64
|
+
s.add_dependency(%q<rjb>, [">= 1.2.5"])
|
65
|
+
s.add_dependency(%q<treebank>, [">= 3.0.0"])
|
66
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
@@ -0,0 +1,224 @@
|
|
1
|
+
#!/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
|
5
|
+
# Copyright 2007-2008 William Patrick McNeill
|
6
|
+
#
|
7
|
+
# This file is part of the Stanford Parser Ruby Wrapper.
|
8
|
+
#
|
9
|
+
# The Stanford Parser Ruby Wrapper is free software; you can redistribute it
|
10
|
+
# and/or modify it under the terms of the GNU General Public License as
|
11
|
+
# published by the Free Software Foundation; either version 2 of the License,
|
12
|
+
# or (at your option) any later version.
|
13
|
+
#
|
14
|
+
# The Stanford Parser Ruby Wrapper is distributed in the hope that it will be
|
15
|
+
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
17
|
+
# Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU General Public License along with
|
20
|
+
# editalign; if not, write to the Free Software Foundation, Inc., 51 Franklin
|
21
|
+
# St, Fifth Floor, Boston, MA 02110-1301 USA
|
22
|
+
#
|
23
|
+
#++
|
24
|
+
|
25
|
+
# Test cases for the Stanford Parser module
|
26
|
+
|
27
|
+
require "test/unit"
|
28
|
+
require "set"
|
29
|
+
require "singleton"
|
30
|
+
require "stanfordparser"
|
31
|
+
|
32
|
+
|
33
|
+
class LexicalizedParserTestCase < Test::Unit::TestCase
|
34
|
+
def test_root_path
|
35
|
+
assert_equal StanfordParser::ROOT.class, Pathname
|
36
|
+
end
|
37
|
+
|
38
|
+
def setup
|
39
|
+
@parser = StanfordParser::DefaultParser.instance
|
40
|
+
@tree = @parser.apply("This is a sentence.")
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_parser
|
44
|
+
assert_equal @parser.grammar, StanfordParser::ROOT + "englishPCFG.ser.gz"
|
45
|
+
assert_equal @tree.class, StanfordParser::Tree
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_localTrees
|
49
|
+
# The following call exercises the conversion from java.util.HashSet
|
50
|
+
# objects to Ruby sets.
|
51
|
+
l = @tree.localTrees
|
52
|
+
assert_equal l.size, 5
|
53
|
+
assert_equal Set.new(l.collect {|t| "#{t.label}"}),
|
54
|
+
Set.new(["S", "NP", "VP", "ROOT", "NP"])
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_enumerable
|
58
|
+
# StanfordParser::LexicalizedParser is not an enumerable object.
|
59
|
+
assert_equal @parser.map, []
|
60
|
+
end
|
61
|
+
end # LexicalizedParserTestCase
|
62
|
+
|
63
|
+
|
64
|
+
class TreeTestCase < Test::Unit::TestCase
|
65
|
+
def setup
|
66
|
+
@parser = StanfordParser::DefaultParser.instance
|
67
|
+
@tree = @parser.apply("This is a sentence.")
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_enumerable
|
71
|
+
assert @tree.all? {|n| n.class == StanfordParser::Tree}
|
72
|
+
assert @tree.all? {|n|
|
73
|
+
n._classname == "edu.stanford.nlp.trees.LabeledScoredTreeNode" or
|
74
|
+
n._classname == "edu.stanford.nlp.trees.LabeledScoredTreeLeaf"
|
75
|
+
}
|
76
|
+
assert_equal @tree.map {|n| "#{n.label}"},
|
77
|
+
["ROOT", "S", "NP", "DT", "This", "VP", "VBZ", "is", "NP", "DT", "a", \
|
78
|
+
"NN", "sentence", ".", "."]
|
79
|
+
end
|
80
|
+
end # TreeTestCase
|
81
|
+
|
82
|
+
|
83
|
+
class FeatureLabelTestCase < Test::Unit::TestCase
|
84
|
+
def test_feature_label
|
85
|
+
f = StanfordParser::FeatureLabel.new
|
86
|
+
assert_equal "BEGIN_POS", f.BEGIN_POSITION_KEY
|
87
|
+
f.put(f.BEGIN_POSITION_KEY, 3)
|
88
|
+
assert_equal "END_POS", f.END_POSITION_KEY
|
89
|
+
f.put(f.END_POSITION_KEY, 7)
|
90
|
+
assert_equal "current", f.CURRENT_KEY
|
91
|
+
f.put(f.CURRENT_KEY, "word")
|
92
|
+
assert_equal "{BEGIN_POS=3, END_POS=7, current=word}", f.inspect
|
93
|
+
assert_equal "word [3,7]", f.to_s
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
class DocumentPreprocessorTestCase < Test::Unit::TestCase
|
99
|
+
def setup
|
100
|
+
@preproc = StanfordParser::DocumentPreprocessor.new
|
101
|
+
@standoff_preproc = StanfordParser::StandoffDocumentPreprocessor.new
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_get_sentences_from_string
|
105
|
+
# The following call exercises the conversion from java.util.ArrayList
|
106
|
+
# objects to Ruby arrays.
|
107
|
+
s = @preproc.getSentencesFromString("This is a sentence. So is this.")
|
108
|
+
assert_equal "#{s[0]}", "This is a sentence ."
|
109
|
+
assert_equal "#{s[1]}", "So is this ."
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_enumerable
|
113
|
+
# StanfordParser::DocumentPreprocessor is not an enumerable object.
|
114
|
+
assert_equal @preproc.map, []
|
115
|
+
end
|
116
|
+
|
117
|
+
# Segment and tokenize text containing two sentences.
|
118
|
+
def test_standoff_document_preprocessor
|
119
|
+
sentences = @standoff_preproc.getSentencesFromString("He (John) is tall. So is she.")
|
120
|
+
# Recognize two sentences.
|
121
|
+
assert_equal 2, sentences.length
|
122
|
+
assert sentences.all? {|sentence| sentence.instance_of? StanfordParser::StandoffSentence}
|
123
|
+
assert_equal "He (John) is tall.", sentences.first.to_s
|
124
|
+
assert_equal 7, sentences.first.length
|
125
|
+
assert sentences[0].all? {|token| token.instance_of? StanfordParser::StandoffToken}
|
126
|
+
assert_equal "So is she.", sentences.last.to_s
|
127
|
+
assert_equal 4, sentences.last.length
|
128
|
+
assert sentences[1].all? {|token| token.instance_of? StanfordParser::StandoffToken}
|
129
|
+
# Get the correct token information for the first sentence.
|
130
|
+
assert_equal ["He", "He"], [sentences[0][0].current(), sentences[0][0].word()]
|
131
|
+
assert_equal [0,2], [sentences[0][0].begin_position(), sentences[0][0].end_position()]
|
132
|
+
assert_equal ["(", "-LRB-"], [sentences[0][1].current(), sentences[0][1].word()]
|
133
|
+
assert_equal [3,4], [sentences[0][1].begin_position(), sentences[0][1].end_position()]
|
134
|
+
assert_equal ["John", "John"], [sentences[0][2].current(), sentences[0][2].word()]
|
135
|
+
assert_equal [4,8], [sentences[0][2].begin_position(), sentences[0][2].end_position()]
|
136
|
+
assert_equal [")", "-RRB-"], [sentences[0][3].current(), sentences[0][3].word()]
|
137
|
+
assert_equal [8,9], [sentences[0][3].begin_position(), sentences[0][3].end_position()]
|
138
|
+
assert_equal ["is", "is"], [sentences[0][4].current(), sentences[0][4].word()]
|
139
|
+
assert_equal [10,12], [sentences[0][4].begin_position(), sentences[0][4].end_position()]
|
140
|
+
assert_equal ["tall", "tall"], [sentences[0][5].current(), sentences[0][5].word()]
|
141
|
+
assert_equal [13,17], [sentences[0][5].begin_position(), sentences[0][5].end_position()]
|
142
|
+
assert_equal [".", "."], [sentences[0][6].current(), sentences[0][6].word()]
|
143
|
+
assert_equal [17,18], [sentences[0][6].begin_position(), sentences[0][6].end_position()]
|
144
|
+
# Get the correct token information for the second sentence.
|
145
|
+
assert_equal ["So", "So"], [sentences[1][0].current(), sentences[1][0].word()]
|
146
|
+
assert_equal [20,22], [sentences[1][0].begin_position(), sentences[1][0].end_position()]
|
147
|
+
assert_equal ["is", "is"], [sentences[1][1].current(), sentences[1][1].word()]
|
148
|
+
assert_equal [23,25], [sentences[1][1].begin_position(), sentences[1][1].end_position()]
|
149
|
+
assert_equal ["she", "she"], [sentences[1][2].current(), sentences[1][2].word()]
|
150
|
+
assert_equal [26,29], [sentences[1][2].begin_position(), sentences[1][2].end_position()]
|
151
|
+
assert_equal [".", "."], [sentences[1][3].current(), sentences[1][3].word()]
|
152
|
+
assert_equal [29,30], [sentences[1][3].begin_position(), sentences[1][3].end_position()]
|
153
|
+
end
|
154
|
+
|
155
|
+
def test_stringification
|
156
|
+
assert_equal "<DocumentPreprocessor>", @preproc.inspect
|
157
|
+
assert_equal "<DocumentPreprocessor>", @preproc.to_s
|
158
|
+
assert_equal "<StandoffDocumentPreprocessor>", @standoff_preproc.inspect
|
159
|
+
assert_equal "<StandoffDocumentPreprocessor>", @standoff_preproc.to_s
|
160
|
+
end
|
161
|
+
|
162
|
+
end # DocumentPreprocessorTestCase
|
163
|
+
|
164
|
+
|
165
|
+
class StandoffParsedTextTestCase < Test::Unit::TestCase
|
166
|
+
def setup
|
167
|
+
@text = "He (John) is tall. So is she."
|
168
|
+
end
|
169
|
+
|
170
|
+
def test_parse_text_default_nodetype
|
171
|
+
parsed_text = StanfordParser::StandoffParsedText.new(@text)
|
172
|
+
verify_parsed_text(parsed_text, StanfordParser::StandoffNode)
|
173
|
+
end
|
174
|
+
|
175
|
+
# Verify correct parsing with variable node types for text containing two sentences.
|
176
|
+
def verify_parsed_text(parsed_text, nodetype)
|
177
|
+
# Verify that there are two sentences.
|
178
|
+
assert_equal 2, parsed_text.length
|
179
|
+
assert parsed_text.all? {|sentence| sentence.instance_of? nodetype}
|
180
|
+
# Verify the tokens in the leaf node of the first sentence.
|
181
|
+
leaves = parsed_text[0].leaves.collect {|node| node.label}
|
182
|
+
assert_equal ["He", "He"], [leaves[0].current(), leaves[0].word()]
|
183
|
+
assert_equal [0,2], [leaves[0].begin_position(), leaves[0].end_position()]
|
184
|
+
assert_equal ["(", "-LRB-"], [leaves[1].current(), leaves[1].word()]
|
185
|
+
assert_equal [3,4], [leaves[1].begin_position(), leaves[1].end_position()]
|
186
|
+
assert_equal ["John", "John"], [leaves[2].current(), leaves[2].word()]
|
187
|
+
assert_equal [4,8], [leaves[2].begin_position(), leaves[2].end_position()]
|
188
|
+
assert_equal [")", "-RRB-"], [leaves[3].current(), leaves[3].word()]
|
189
|
+
assert_equal [8,9], [leaves[3].begin_position(), leaves[3].end_position()]
|
190
|
+
assert_equal ["is", "is"], [leaves[4].current(), leaves[4].word()]
|
191
|
+
assert_equal [10,12], [leaves[4].begin_position(), leaves[4].end_position()]
|
192
|
+
assert_equal ["tall", "tall"], [leaves[5].current(), leaves[5].word()]
|
193
|
+
assert_equal [13,17], [leaves[5].begin_position(), leaves[5].end_position()]
|
194
|
+
assert_equal [".", "."], [leaves[6].current(), leaves[6].word()]
|
195
|
+
assert_equal [17,18], [leaves[6].begin_position(), leaves[6].end_position()]
|
196
|
+
# Verify the tokens in the leaf node of the second sentence.
|
197
|
+
leaves = parsed_text[1].leaves.collect {|node| node.label}
|
198
|
+
assert_equal ["So", "So"], [leaves[0].current(), leaves[0].word()]
|
199
|
+
assert_equal [20,22], [leaves[0].begin_position(), leaves[0].end_position()]
|
200
|
+
assert_equal ["is", "is"], [leaves[1].current(), leaves[1].word()]
|
201
|
+
assert_equal [23,25], [leaves[1].begin_position(), leaves[1].end_position()]
|
202
|
+
assert_equal ["she", "she"], [leaves[2].current(), leaves[2].word()]
|
203
|
+
assert_equal [26,29], [leaves[2].begin_position(), leaves[2].end_position()]
|
204
|
+
assert_equal [".", "."], [leaves[3].current(), leaves[3].word()]
|
205
|
+
assert_equal [29,30], [leaves[3].begin_position(), leaves[3].end_position()]
|
206
|
+
# Verify that the original string is recoverable.
|
207
|
+
assert_equal "He (John) is tall. ", parsed_text[0].to_original_string
|
208
|
+
assert_equal "So is she." , parsed_text[1].to_original_string
|
209
|
+
# Draw < and > brackets around 3 constituents.
|
210
|
+
b = parsed_text[0].to_bracketed_string([[0,0], [0,0,1,1], [0,1,1]], "<", ">")
|
211
|
+
assert_equal "<He (<John>)> is <tall>. ", b
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
|
216
|
+
class MiscPreprocessorTestCase < Test::Unit::TestCase
|
217
|
+
def test_model_location
|
218
|
+
assert_equal "$(ROOT)/englishPCFG.ser.gz", StanfordParser::ENGLISH_PCFG_MODEL
|
219
|
+
end
|
220
|
+
|
221
|
+
def test_word
|
222
|
+
assert StanfordParser::Word.new("edu.stanford.nlp.ling.Word", "dog") == "dog"
|
223
|
+
end
|
224
|
+
end # MiscPreprocessorTestCase
|
metadata
ADDED
@@ -0,0 +1,207 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: stanfordparser-infochimps
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 12
|
5
|
+
prerelease: 6
|
6
|
+
segments:
|
7
|
+
- 2
|
8
|
+
- 2
|
9
|
+
- 1
|
10
|
+
- s
|
11
|
+
version: 2.2.1.s
|
12
|
+
platform: ruby
|
13
|
+
authors:
|
14
|
+
- John Wilkinson
|
15
|
+
- Bill McNeal
|
16
|
+
autorequire:
|
17
|
+
bindir: bin
|
18
|
+
cert_chain: []
|
19
|
+
|
20
|
+
date: 2011-01-27 00:00:00 -06:00
|
21
|
+
default_executable:
|
22
|
+
dependencies:
|
23
|
+
- !ruby/object:Gem::Dependency
|
24
|
+
version_requirements: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
requirement: *id001
|
34
|
+
prerelease: false
|
35
|
+
name: rjb
|
36
|
+
type: :runtime
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
version_requirements: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
requirement: *id002
|
48
|
+
prerelease: false
|
49
|
+
name: rake
|
50
|
+
type: :runtime
|
51
|
+
- !ruby/object:Gem::Dependency
|
52
|
+
version_requirements: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
requirement: *id003
|
62
|
+
prerelease: false
|
63
|
+
name: jeweler
|
64
|
+
type: :runtime
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
version_requirements: &id004 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 7
|
72
|
+
segments:
|
73
|
+
- 3
|
74
|
+
- 0
|
75
|
+
- 0
|
76
|
+
version: 3.0.0
|
77
|
+
requirement: *id004
|
78
|
+
prerelease: false
|
79
|
+
name: treebank
|
80
|
+
type: :runtime
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
version_requirements: &id005 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
hash: 13
|
88
|
+
segments:
|
89
|
+
- 1
|
90
|
+
- 2
|
91
|
+
- 9
|
92
|
+
version: 1.2.9
|
93
|
+
requirement: *id005
|
94
|
+
prerelease: false
|
95
|
+
name: rspec
|
96
|
+
type: :runtime
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
version_requirements: &id006 !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
hash: 21
|
104
|
+
segments:
|
105
|
+
- 1
|
106
|
+
- 2
|
107
|
+
- 5
|
108
|
+
version: 1.2.5
|
109
|
+
requirement: *id006
|
110
|
+
prerelease: false
|
111
|
+
name: rjb
|
112
|
+
type: :runtime
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
version_requirements: &id007 !ruby/object:Gem::Requirement
|
115
|
+
none: false
|
116
|
+
requirements:
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
hash: 7
|
120
|
+
segments:
|
121
|
+
- 3
|
122
|
+
- 0
|
123
|
+
- 0
|
124
|
+
version: 3.0.0
|
125
|
+
requirement: *id007
|
126
|
+
prerelease: false
|
127
|
+
name: treebank
|
128
|
+
type: :runtime
|
129
|
+
- !ruby/object:Gem::Dependency
|
130
|
+
version_requirements: &id008 !ruby/object:Gem::Requirement
|
131
|
+
none: false
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
hash: 13
|
136
|
+
segments:
|
137
|
+
- 1
|
138
|
+
- 2
|
139
|
+
- 9
|
140
|
+
version: 1.2.9
|
141
|
+
requirement: *id008
|
142
|
+
prerelease: false
|
143
|
+
name: rspec
|
144
|
+
type: :development
|
145
|
+
description: Ruby wrapper of the Stanford Parser, a NLP parser built in Java.
|
146
|
+
email: jcwilk@gmail.com
|
147
|
+
executables: []
|
148
|
+
|
149
|
+
extensions: []
|
150
|
+
|
151
|
+
extra_rdoc_files:
|
152
|
+
- LICENSE
|
153
|
+
- README.orig.rdoc
|
154
|
+
- README.rdoc
|
155
|
+
files:
|
156
|
+
- .document
|
157
|
+
- Gemfile
|
158
|
+
- LICENSE
|
159
|
+
- README.orig.rdoc
|
160
|
+
- README.rdoc
|
161
|
+
- Rakefile
|
162
|
+
- TESTS_STATUS.rdoc
|
163
|
+
- VERSION.yml
|
164
|
+
- examples/stanford-sentence-parser.rb
|
165
|
+
- lib/stanfordparser.rb
|
166
|
+
- lib/stanfordparser/java_object.rb
|
167
|
+
- stanfordparser.gemspec
|
168
|
+
- test/test_stanfordparser.rb
|
169
|
+
has_rdoc: true
|
170
|
+
homepage: http://github.com/jcwilk/stanfordparser
|
171
|
+
licenses: []
|
172
|
+
|
173
|
+
post_install_message:
|
174
|
+
rdoc_options: []
|
175
|
+
|
176
|
+
require_paths:
|
177
|
+
- lib
|
178
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
179
|
+
none: false
|
180
|
+
requirements:
|
181
|
+
- - ">="
|
182
|
+
- !ruby/object:Gem::Version
|
183
|
+
hash: 3
|
184
|
+
segments:
|
185
|
+
- 0
|
186
|
+
version: "0"
|
187
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
188
|
+
none: false
|
189
|
+
requirements:
|
190
|
+
- - ">"
|
191
|
+
- !ruby/object:Gem::Version
|
192
|
+
hash: 25
|
193
|
+
segments:
|
194
|
+
- 1
|
195
|
+
- 3
|
196
|
+
- 1
|
197
|
+
version: 1.3.1
|
198
|
+
requirements: []
|
199
|
+
|
200
|
+
rubyforge_project:
|
201
|
+
rubygems_version: 1.4.2
|
202
|
+
signing_key:
|
203
|
+
specification_version: 3
|
204
|
+
summary: GitHub upload/extension of Bill McNeal's stanfordparser rubygem
|
205
|
+
test_files:
|
206
|
+
- examples/stanford-sentence-parser.rb
|
207
|
+
- test/test_stanfordparser.rb
|