stanfordparser-infochimps 2.2.1.s
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +6 -0
- data/LICENSE +18 -0
- data/README.orig.rdoc +123 -0
- data/README.rdoc +37 -0
- data/Rakefile +60 -0
- data/TESTS_STATUS.rdoc +2 -0
- data/VERSION.yml +5 -0
- data/examples/stanford-sentence-parser.rb +46 -0
- data/lib/stanfordparser.rb +453 -0
- data/lib/stanfordparser/java_object.rb +129 -0
- data/stanfordparser.gemspec +69 -0
- data/test/test_stanfordparser.rb +224 -0
- metadata +207 -0
@@ -0,0 +1,129 @@
|
|
1
|
+
# Copyright 2007-2008 William Patrick McNeill
|
2
|
+
#
|
3
|
+
# This file is part of the Stanford Parser Ruby Wrapper.
|
4
|
+
#
|
5
|
+
# The Stanford Parser Ruby Wrapper is free software; you can redistribute it
|
6
|
+
# and/or modify it under the terms of the GNU General Public License as
|
7
|
+
# published by the Free Software Foundation; either version 2 of the License,
|
8
|
+
# or (at your option) any later version.
|
9
|
+
#
|
10
|
+
# The Stanford Parser Ruby Wrapper is distributed in the hope that it will be
|
11
|
+
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
13
|
+
# Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License along with
|
16
|
+
# editalign; if not, write to the Free Software Foundation, Inc., 51 Franklin
|
17
|
+
# St, Fifth Floor, Boston, MA 02110-1301 USA
|
18
|
+
|
19
|
+
# Extenions to the {Ruby-Java Bridge}[http://rjb.rubyforge.org/] module that
|
20
|
+
# add a generic Java object wrapper class.
|
21
|
+
module Rjb
|
22
|
+
|
23
|
+
#--
|
24
|
+
# The documentation for this class appears next to its extension inside the
|
25
|
+
# StanfordParser module in stanfordparser.rb. This should be changed if Rjb
|
26
|
+
# is ever moved into its own gem. See the documention in stanfordparser.rb
|
27
|
+
# for more details.
|
28
|
+
#++
|
29
|
+
class JavaObjectWrapper
|
30
|
+
include Enumerable
|
31
|
+
|
32
|
+
# The underlying Java object.
|
33
|
+
attr_reader :java_object
|
34
|
+
|
35
|
+
# Initialize with a Java object <em>obj</em>. If <em>obj</em> is a
|
36
|
+
# String, treat it as a Java class name and instantiate it. Otherwise,
|
37
|
+
# treat <em>obj</em> as an instance of a Java object.
|
38
|
+
def initialize(obj, *args)
|
39
|
+
@java_object = obj.class == String ?
|
40
|
+
Rjb::import(obj).send(:new, *args) : obj
|
41
|
+
end
|
42
|
+
|
43
|
+
# Enumerate all the items in the object using its iterator. If the object
|
44
|
+
# has no iterator, this function yields nothing.
|
45
|
+
def each
|
46
|
+
if @java_object.getClass.getMethods.any? {|m| m.getName == "iterator"}
|
47
|
+
i = @java_object.iterator
|
48
|
+
while i.hasNext
|
49
|
+
yield wrap_java_object(i.next)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end # each
|
53
|
+
|
54
|
+
# Reflect unhandled method calls to the underlying Java object and wrap
|
55
|
+
# the return value in the appropriate Ruby object.
|
56
|
+
def method_missing(m, *args)
|
57
|
+
begin
|
58
|
+
wrap_java_object(@java_object.send(m, *args))
|
59
|
+
rescue RuntimeError => e
|
60
|
+
# The instance method failed. See if this is a static method.
|
61
|
+
if not e.message.match(/^Fail: unknown method name/).nil?
|
62
|
+
getClass.send(m, *args)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Convert a value returned by a call to the underlying Java object to the
|
68
|
+
# appropriate Ruby object.
|
69
|
+
#
|
70
|
+
# If the value is a JavaObjectWrapper, convert it using a protected
|
71
|
+
# function with the name wrap_ followed by the underlying object's
|
72
|
+
# classname with the Java path delimiters converted to underscores. For
|
73
|
+
# example, a <tt>java.util.ArrayList</tt> would be converted by a function
|
74
|
+
# called wrap_java_util_ArrayList.
|
75
|
+
#
|
76
|
+
# If the value lacks the appropriate converter function, wrap it in a
|
77
|
+
# generic JavaObjectWrapper.
|
78
|
+
#
|
79
|
+
# If the value is not a JavaObjectWrapper, return it unchanged.
|
80
|
+
#
|
81
|
+
# This function is called recursively for every element in an Array.
|
82
|
+
def wrap_java_object(object)
|
83
|
+
if object.kind_of?(Array)
|
84
|
+
object.collect {|item| wrap_java_object(item)}
|
85
|
+
elsif object.respond_to?(:_classname)
|
86
|
+
# Ruby-Java Bridge Java objects all have a _classname member which
|
87
|
+
# tells the name of their Java class. Convert this to the
|
88
|
+
# corresponding wrapper function name.
|
89
|
+
wrapper_name = ("wrap_" + object._classname.gsub(/\./, "_")).to_sym
|
90
|
+
respond_to?(wrapper_name) ? send(wrapper_name, object) : JavaObjectWrapper.new(object)
|
91
|
+
else
|
92
|
+
object
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Convert <tt>java.util.ArrayList</tt> objects to Ruby Array objects.
|
97
|
+
def wrap_java_util_ArrayList(object)
|
98
|
+
array_list = []
|
99
|
+
object.size.times do
|
100
|
+
|i| array_list << wrap_java_object(object.get(i))
|
101
|
+
end
|
102
|
+
array_list
|
103
|
+
end
|
104
|
+
|
105
|
+
# Convert <tt>java.util.HashSet</tt> objects to Ruby Set objects.
|
106
|
+
def wrap_java_util_HashSet(object)
|
107
|
+
set = Set.new
|
108
|
+
i = object.iterator
|
109
|
+
while i.hasNext
|
110
|
+
set << wrap_java_object(i.next)
|
111
|
+
end
|
112
|
+
set
|
113
|
+
end
|
114
|
+
|
115
|
+
# Show the classname of the underlying Java object.
|
116
|
+
def inspect
|
117
|
+
"<#{@java_object._classname}>"
|
118
|
+
end
|
119
|
+
|
120
|
+
# Use the underlying Java object's stringification.
|
121
|
+
def to_s
|
122
|
+
toString
|
123
|
+
end
|
124
|
+
|
125
|
+
protected :wrap_java_object, :wrap_java_util_ArrayList, :wrap_java_util_HashSet
|
126
|
+
|
127
|
+
end # JavaObjectWrapper
|
128
|
+
|
129
|
+
end # Rjb
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{stanfordparser}
|
8
|
+
s.version = "2.2.1.s"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new("> 1.3.1") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["John Wilkinson", "Bill McNeal"]
|
12
|
+
s.date = %q{2010-06-21}
|
13
|
+
s.description = %q{Ruby wrapper of the Stanford Parser, a NLP parser built in Java.}
|
14
|
+
s.email = %q{jcwilk@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.orig",
|
18
|
+
"README.rdoc"
|
19
|
+
]
|
20
|
+
s.files = [
|
21
|
+
".document",
|
22
|
+
".gitignore",
|
23
|
+
"LICENSE",
|
24
|
+
"README.orig",
|
25
|
+
"README.rdoc",
|
26
|
+
"Rakefile",
|
27
|
+
"VERSION.yml",
|
28
|
+
"examples/connection_finder.rb",
|
29
|
+
"examples/stanford-sentence-parser.rb",
|
30
|
+
"lib/stanfordparser.rb",
|
31
|
+
"lib/stanfordparser/java_object.rb",
|
32
|
+
"spec/spec.opts",
|
33
|
+
"spec/spec_helper.rb",
|
34
|
+
"spec/stanfordparser_spec.rb",
|
35
|
+
"test/test_stanfordparser.rb"
|
36
|
+
]
|
37
|
+
s.homepage = %q{http://github.com/jcwilk/stanfordparser}
|
38
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
39
|
+
s.require_paths = ["lib"]
|
40
|
+
s.rubygems_version = %q{1.3.7}
|
41
|
+
s.summary = %q{GitHub upload/extension of Bill McNeal's stanfordparser rubygem}
|
42
|
+
s.test_files = [
|
43
|
+
"spec/spec_helper.rb",
|
44
|
+
"spec/stanfordparser_spec.rb",
|
45
|
+
"test/test_stanfordparser.rb",
|
46
|
+
"examples/connection_finder.rb",
|
47
|
+
"examples/stanford-sentence-parser.rb"
|
48
|
+
]
|
49
|
+
|
50
|
+
if s.respond_to? :specification_version then
|
51
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
52
|
+
s.specification_version = 3
|
53
|
+
|
54
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
55
|
+
s.add_runtime_dependency(%q<rjb>, [">= 1.2.5"])
|
56
|
+
s.add_runtime_dependency(%q<treebank>, [">= 3.0.0"])
|
57
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
58
|
+
else
|
59
|
+
s.add_dependency(%q<rjb>, [">= 1.2.5"])
|
60
|
+
s.add_dependency(%q<treebank>, [">= 3.0.0"])
|
61
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
62
|
+
end
|
63
|
+
else
|
64
|
+
s.add_dependency(%q<rjb>, [">= 1.2.5"])
|
65
|
+
s.add_dependency(%q<treebank>, [">= 3.0.0"])
|
66
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
@@ -0,0 +1,224 @@
|
|
1
|
+
#!/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
|
5
|
+
# Copyright 2007-2008 William Patrick McNeill
|
6
|
+
#
|
7
|
+
# This file is part of the Stanford Parser Ruby Wrapper.
|
8
|
+
#
|
9
|
+
# The Stanford Parser Ruby Wrapper is free software; you can redistribute it
|
10
|
+
# and/or modify it under the terms of the GNU General Public License as
|
11
|
+
# published by the Free Software Foundation; either version 2 of the License,
|
12
|
+
# or (at your option) any later version.
|
13
|
+
#
|
14
|
+
# The Stanford Parser Ruby Wrapper is distributed in the hope that it will be
|
15
|
+
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
17
|
+
# Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU General Public License along with
|
20
|
+
# editalign; if not, write to the Free Software Foundation, Inc., 51 Franklin
|
21
|
+
# St, Fifth Floor, Boston, MA 02110-1301 USA
|
22
|
+
#
|
23
|
+
#++
|
24
|
+
|
25
|
+
# Test cases for the Stanford Parser module
|
26
|
+
|
27
|
+
require "test/unit"
|
28
|
+
require "set"
|
29
|
+
require "singleton"
|
30
|
+
require "stanfordparser"
|
31
|
+
|
32
|
+
|
33
|
+
class LexicalizedParserTestCase < Test::Unit::TestCase
|
34
|
+
def test_root_path
|
35
|
+
assert_equal StanfordParser::ROOT.class, Pathname
|
36
|
+
end
|
37
|
+
|
38
|
+
def setup
|
39
|
+
@parser = StanfordParser::DefaultParser.instance
|
40
|
+
@tree = @parser.apply("This is a sentence.")
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_parser
|
44
|
+
assert_equal @parser.grammar, StanfordParser::ROOT + "englishPCFG.ser.gz"
|
45
|
+
assert_equal @tree.class, StanfordParser::Tree
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_localTrees
|
49
|
+
# The following call exercises the conversion from java.util.HashSet
|
50
|
+
# objects to Ruby sets.
|
51
|
+
l = @tree.localTrees
|
52
|
+
assert_equal l.size, 5
|
53
|
+
assert_equal Set.new(l.collect {|t| "#{t.label}"}),
|
54
|
+
Set.new(["S", "NP", "VP", "ROOT", "NP"])
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_enumerable
|
58
|
+
# StanfordParser::LexicalizedParser is not an enumerable object.
|
59
|
+
assert_equal @parser.map, []
|
60
|
+
end
|
61
|
+
end # LexicalizedParserTestCase
|
62
|
+
|
63
|
+
|
64
|
+
class TreeTestCase < Test::Unit::TestCase
|
65
|
+
def setup
|
66
|
+
@parser = StanfordParser::DefaultParser.instance
|
67
|
+
@tree = @parser.apply("This is a sentence.")
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_enumerable
|
71
|
+
assert @tree.all? {|n| n.class == StanfordParser::Tree}
|
72
|
+
assert @tree.all? {|n|
|
73
|
+
n._classname == "edu.stanford.nlp.trees.LabeledScoredTreeNode" or
|
74
|
+
n._classname == "edu.stanford.nlp.trees.LabeledScoredTreeLeaf"
|
75
|
+
}
|
76
|
+
assert_equal @tree.map {|n| "#{n.label}"},
|
77
|
+
["ROOT", "S", "NP", "DT", "This", "VP", "VBZ", "is", "NP", "DT", "a", \
|
78
|
+
"NN", "sentence", ".", "."]
|
79
|
+
end
|
80
|
+
end # TreeTestCase
|
81
|
+
|
82
|
+
|
83
|
+
class FeatureLabelTestCase < Test::Unit::TestCase
|
84
|
+
def test_feature_label
|
85
|
+
f = StanfordParser::FeatureLabel.new
|
86
|
+
assert_equal "BEGIN_POS", f.BEGIN_POSITION_KEY
|
87
|
+
f.put(f.BEGIN_POSITION_KEY, 3)
|
88
|
+
assert_equal "END_POS", f.END_POSITION_KEY
|
89
|
+
f.put(f.END_POSITION_KEY, 7)
|
90
|
+
assert_equal "current", f.CURRENT_KEY
|
91
|
+
f.put(f.CURRENT_KEY, "word")
|
92
|
+
assert_equal "{BEGIN_POS=3, END_POS=7, current=word}", f.inspect
|
93
|
+
assert_equal "word [3,7]", f.to_s
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
class DocumentPreprocessorTestCase < Test::Unit::TestCase
|
99
|
+
def setup
|
100
|
+
@preproc = StanfordParser::DocumentPreprocessor.new
|
101
|
+
@standoff_preproc = StanfordParser::StandoffDocumentPreprocessor.new
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_get_sentences_from_string
|
105
|
+
# The following call exercises the conversion from java.util.ArrayList
|
106
|
+
# objects to Ruby arrays.
|
107
|
+
s = @preproc.getSentencesFromString("This is a sentence. So is this.")
|
108
|
+
assert_equal "#{s[0]}", "This is a sentence ."
|
109
|
+
assert_equal "#{s[1]}", "So is this ."
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_enumerable
|
113
|
+
# StanfordParser::DocumentPreprocessor is not an enumerable object.
|
114
|
+
assert_equal @preproc.map, []
|
115
|
+
end
|
116
|
+
|
117
|
+
# Segment and tokenize text containing two sentences.
|
118
|
+
def test_standoff_document_preprocessor
|
119
|
+
sentences = @standoff_preproc.getSentencesFromString("He (John) is tall. So is she.")
|
120
|
+
# Recognize two sentences.
|
121
|
+
assert_equal 2, sentences.length
|
122
|
+
assert sentences.all? {|sentence| sentence.instance_of? StanfordParser::StandoffSentence}
|
123
|
+
assert_equal "He (John) is tall.", sentences.first.to_s
|
124
|
+
assert_equal 7, sentences.first.length
|
125
|
+
assert sentences[0].all? {|token| token.instance_of? StanfordParser::StandoffToken}
|
126
|
+
assert_equal "So is she.", sentences.last.to_s
|
127
|
+
assert_equal 4, sentences.last.length
|
128
|
+
assert sentences[1].all? {|token| token.instance_of? StanfordParser::StandoffToken}
|
129
|
+
# Get the correct token information for the first sentence.
|
130
|
+
assert_equal ["He", "He"], [sentences[0][0].current(), sentences[0][0].word()]
|
131
|
+
assert_equal [0,2], [sentences[0][0].begin_position(), sentences[0][0].end_position()]
|
132
|
+
assert_equal ["(", "-LRB-"], [sentences[0][1].current(), sentences[0][1].word()]
|
133
|
+
assert_equal [3,4], [sentences[0][1].begin_position(), sentences[0][1].end_position()]
|
134
|
+
assert_equal ["John", "John"], [sentences[0][2].current(), sentences[0][2].word()]
|
135
|
+
assert_equal [4,8], [sentences[0][2].begin_position(), sentences[0][2].end_position()]
|
136
|
+
assert_equal [")", "-RRB-"], [sentences[0][3].current(), sentences[0][3].word()]
|
137
|
+
assert_equal [8,9], [sentences[0][3].begin_position(), sentences[0][3].end_position()]
|
138
|
+
assert_equal ["is", "is"], [sentences[0][4].current(), sentences[0][4].word()]
|
139
|
+
assert_equal [10,12], [sentences[0][4].begin_position(), sentences[0][4].end_position()]
|
140
|
+
assert_equal ["tall", "tall"], [sentences[0][5].current(), sentences[0][5].word()]
|
141
|
+
assert_equal [13,17], [sentences[0][5].begin_position(), sentences[0][5].end_position()]
|
142
|
+
assert_equal [".", "."], [sentences[0][6].current(), sentences[0][6].word()]
|
143
|
+
assert_equal [17,18], [sentences[0][6].begin_position(), sentences[0][6].end_position()]
|
144
|
+
# Get the correct token information for the second sentence.
|
145
|
+
assert_equal ["So", "So"], [sentences[1][0].current(), sentences[1][0].word()]
|
146
|
+
assert_equal [20,22], [sentences[1][0].begin_position(), sentences[1][0].end_position()]
|
147
|
+
assert_equal ["is", "is"], [sentences[1][1].current(), sentences[1][1].word()]
|
148
|
+
assert_equal [23,25], [sentences[1][1].begin_position(), sentences[1][1].end_position()]
|
149
|
+
assert_equal ["she", "she"], [sentences[1][2].current(), sentences[1][2].word()]
|
150
|
+
assert_equal [26,29], [sentences[1][2].begin_position(), sentences[1][2].end_position()]
|
151
|
+
assert_equal [".", "."], [sentences[1][3].current(), sentences[1][3].word()]
|
152
|
+
assert_equal [29,30], [sentences[1][3].begin_position(), sentences[1][3].end_position()]
|
153
|
+
end
|
154
|
+
|
155
|
+
def test_stringification
|
156
|
+
assert_equal "<DocumentPreprocessor>", @preproc.inspect
|
157
|
+
assert_equal "<DocumentPreprocessor>", @preproc.to_s
|
158
|
+
assert_equal "<StandoffDocumentPreprocessor>", @standoff_preproc.inspect
|
159
|
+
assert_equal "<StandoffDocumentPreprocessor>", @standoff_preproc.to_s
|
160
|
+
end
|
161
|
+
|
162
|
+
end # DocumentPreprocessorTestCase
|
163
|
+
|
164
|
+
|
165
|
+
class StandoffParsedTextTestCase < Test::Unit::TestCase
|
166
|
+
def setup
|
167
|
+
@text = "He (John) is tall. So is she."
|
168
|
+
end
|
169
|
+
|
170
|
+
def test_parse_text_default_nodetype
|
171
|
+
parsed_text = StanfordParser::StandoffParsedText.new(@text)
|
172
|
+
verify_parsed_text(parsed_text, StanfordParser::StandoffNode)
|
173
|
+
end
|
174
|
+
|
175
|
+
# Verify correct parsing with variable node types for text containing two sentences.
|
176
|
+
def verify_parsed_text(parsed_text, nodetype)
|
177
|
+
# Verify that there are two sentences.
|
178
|
+
assert_equal 2, parsed_text.length
|
179
|
+
assert parsed_text.all? {|sentence| sentence.instance_of? nodetype}
|
180
|
+
# Verify the tokens in the leaf node of the first sentence.
|
181
|
+
leaves = parsed_text[0].leaves.collect {|node| node.label}
|
182
|
+
assert_equal ["He", "He"], [leaves[0].current(), leaves[0].word()]
|
183
|
+
assert_equal [0,2], [leaves[0].begin_position(), leaves[0].end_position()]
|
184
|
+
assert_equal ["(", "-LRB-"], [leaves[1].current(), leaves[1].word()]
|
185
|
+
assert_equal [3,4], [leaves[1].begin_position(), leaves[1].end_position()]
|
186
|
+
assert_equal ["John", "John"], [leaves[2].current(), leaves[2].word()]
|
187
|
+
assert_equal [4,8], [leaves[2].begin_position(), leaves[2].end_position()]
|
188
|
+
assert_equal [")", "-RRB-"], [leaves[3].current(), leaves[3].word()]
|
189
|
+
assert_equal [8,9], [leaves[3].begin_position(), leaves[3].end_position()]
|
190
|
+
assert_equal ["is", "is"], [leaves[4].current(), leaves[4].word()]
|
191
|
+
assert_equal [10,12], [leaves[4].begin_position(), leaves[4].end_position()]
|
192
|
+
assert_equal ["tall", "tall"], [leaves[5].current(), leaves[5].word()]
|
193
|
+
assert_equal [13,17], [leaves[5].begin_position(), leaves[5].end_position()]
|
194
|
+
assert_equal [".", "."], [leaves[6].current(), leaves[6].word()]
|
195
|
+
assert_equal [17,18], [leaves[6].begin_position(), leaves[6].end_position()]
|
196
|
+
# Verify the tokens in the leaf node of the second sentence.
|
197
|
+
leaves = parsed_text[1].leaves.collect {|node| node.label}
|
198
|
+
assert_equal ["So", "So"], [leaves[0].current(), leaves[0].word()]
|
199
|
+
assert_equal [20,22], [leaves[0].begin_position(), leaves[0].end_position()]
|
200
|
+
assert_equal ["is", "is"], [leaves[1].current(), leaves[1].word()]
|
201
|
+
assert_equal [23,25], [leaves[1].begin_position(), leaves[1].end_position()]
|
202
|
+
assert_equal ["she", "she"], [leaves[2].current(), leaves[2].word()]
|
203
|
+
assert_equal [26,29], [leaves[2].begin_position(), leaves[2].end_position()]
|
204
|
+
assert_equal [".", "."], [leaves[3].current(), leaves[3].word()]
|
205
|
+
assert_equal [29,30], [leaves[3].begin_position(), leaves[3].end_position()]
|
206
|
+
# Verify that the original string is recoverable.
|
207
|
+
assert_equal "He (John) is tall. ", parsed_text[0].to_original_string
|
208
|
+
assert_equal "So is she." , parsed_text[1].to_original_string
|
209
|
+
# Draw < and > brackets around 3 constituents.
|
210
|
+
b = parsed_text[0].to_bracketed_string([[0,0], [0,0,1,1], [0,1,1]], "<", ">")
|
211
|
+
assert_equal "<He (<John>)> is <tall>. ", b
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
|
216
|
+
class MiscPreprocessorTestCase < Test::Unit::TestCase
|
217
|
+
def test_model_location
|
218
|
+
assert_equal "$(ROOT)/englishPCFG.ser.gz", StanfordParser::ENGLISH_PCFG_MODEL
|
219
|
+
end
|
220
|
+
|
221
|
+
def test_word
|
222
|
+
assert StanfordParser::Word.new("edu.stanford.nlp.ling.Word", "dog") == "dog"
|
223
|
+
end
|
224
|
+
end # MiscPreprocessorTestCase
|
metadata
ADDED
@@ -0,0 +1,207 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: stanfordparser-infochimps
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 12
|
5
|
+
prerelease: 6
|
6
|
+
segments:
|
7
|
+
- 2
|
8
|
+
- 2
|
9
|
+
- 1
|
10
|
+
- s
|
11
|
+
version: 2.2.1.s
|
12
|
+
platform: ruby
|
13
|
+
authors:
|
14
|
+
- John Wilkinson
|
15
|
+
- Bill McNeal
|
16
|
+
autorequire:
|
17
|
+
bindir: bin
|
18
|
+
cert_chain: []
|
19
|
+
|
20
|
+
date: 2011-01-27 00:00:00 -06:00
|
21
|
+
default_executable:
|
22
|
+
dependencies:
|
23
|
+
- !ruby/object:Gem::Dependency
|
24
|
+
version_requirements: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
requirement: *id001
|
34
|
+
prerelease: false
|
35
|
+
name: rjb
|
36
|
+
type: :runtime
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
version_requirements: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
requirement: *id002
|
48
|
+
prerelease: false
|
49
|
+
name: rake
|
50
|
+
type: :runtime
|
51
|
+
- !ruby/object:Gem::Dependency
|
52
|
+
version_requirements: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
requirement: *id003
|
62
|
+
prerelease: false
|
63
|
+
name: jeweler
|
64
|
+
type: :runtime
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
version_requirements: &id004 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 7
|
72
|
+
segments:
|
73
|
+
- 3
|
74
|
+
- 0
|
75
|
+
- 0
|
76
|
+
version: 3.0.0
|
77
|
+
requirement: *id004
|
78
|
+
prerelease: false
|
79
|
+
name: treebank
|
80
|
+
type: :runtime
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
version_requirements: &id005 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
hash: 13
|
88
|
+
segments:
|
89
|
+
- 1
|
90
|
+
- 2
|
91
|
+
- 9
|
92
|
+
version: 1.2.9
|
93
|
+
requirement: *id005
|
94
|
+
prerelease: false
|
95
|
+
name: rspec
|
96
|
+
type: :runtime
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
version_requirements: &id006 !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
hash: 21
|
104
|
+
segments:
|
105
|
+
- 1
|
106
|
+
- 2
|
107
|
+
- 5
|
108
|
+
version: 1.2.5
|
109
|
+
requirement: *id006
|
110
|
+
prerelease: false
|
111
|
+
name: rjb
|
112
|
+
type: :runtime
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
version_requirements: &id007 !ruby/object:Gem::Requirement
|
115
|
+
none: false
|
116
|
+
requirements:
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
hash: 7
|
120
|
+
segments:
|
121
|
+
- 3
|
122
|
+
- 0
|
123
|
+
- 0
|
124
|
+
version: 3.0.0
|
125
|
+
requirement: *id007
|
126
|
+
prerelease: false
|
127
|
+
name: treebank
|
128
|
+
type: :runtime
|
129
|
+
- !ruby/object:Gem::Dependency
|
130
|
+
version_requirements: &id008 !ruby/object:Gem::Requirement
|
131
|
+
none: false
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
hash: 13
|
136
|
+
segments:
|
137
|
+
- 1
|
138
|
+
- 2
|
139
|
+
- 9
|
140
|
+
version: 1.2.9
|
141
|
+
requirement: *id008
|
142
|
+
prerelease: false
|
143
|
+
name: rspec
|
144
|
+
type: :development
|
145
|
+
description: Ruby wrapper of the Stanford Parser, a NLP parser built in Java.
|
146
|
+
email: jcwilk@gmail.com
|
147
|
+
executables: []
|
148
|
+
|
149
|
+
extensions: []
|
150
|
+
|
151
|
+
extra_rdoc_files:
|
152
|
+
- LICENSE
|
153
|
+
- README.orig.rdoc
|
154
|
+
- README.rdoc
|
155
|
+
files:
|
156
|
+
- .document
|
157
|
+
- Gemfile
|
158
|
+
- LICENSE
|
159
|
+
- README.orig.rdoc
|
160
|
+
- README.rdoc
|
161
|
+
- Rakefile
|
162
|
+
- TESTS_STATUS.rdoc
|
163
|
+
- VERSION.yml
|
164
|
+
- examples/stanford-sentence-parser.rb
|
165
|
+
- lib/stanfordparser.rb
|
166
|
+
- lib/stanfordparser/java_object.rb
|
167
|
+
- stanfordparser.gemspec
|
168
|
+
- test/test_stanfordparser.rb
|
169
|
+
has_rdoc: true
|
170
|
+
homepage: http://github.com/jcwilk/stanfordparser
|
171
|
+
licenses: []
|
172
|
+
|
173
|
+
post_install_message:
|
174
|
+
rdoc_options: []
|
175
|
+
|
176
|
+
require_paths:
|
177
|
+
- lib
|
178
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
179
|
+
none: false
|
180
|
+
requirements:
|
181
|
+
- - ">="
|
182
|
+
- !ruby/object:Gem::Version
|
183
|
+
hash: 3
|
184
|
+
segments:
|
185
|
+
- 0
|
186
|
+
version: "0"
|
187
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
188
|
+
none: false
|
189
|
+
requirements:
|
190
|
+
- - ">"
|
191
|
+
- !ruby/object:Gem::Version
|
192
|
+
hash: 25
|
193
|
+
segments:
|
194
|
+
- 1
|
195
|
+
- 3
|
196
|
+
- 1
|
197
|
+
version: 1.3.1
|
198
|
+
requirements: []
|
199
|
+
|
200
|
+
rubyforge_project:
|
201
|
+
rubygems_version: 1.4.2
|
202
|
+
signing_key:
|
203
|
+
specification_version: 3
|
204
|
+
summary: GitHub upload/extension of Bill McNeal's stanfordparser rubygem
|
205
|
+
test_files:
|
206
|
+
- examples/stanford-sentence-parser.rb
|
207
|
+
- test/test_stanfordparser.rb
|