stanford-core-nlp 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +2 -0
- data/lib/stanford-core-nlp.rb +33 -6
- metadata +2 -2
data/README.md
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
|
5
5
|
This gem provides high-level Ruby bindings to the [Stanford Core NLP package](http://nlp.stanford.edu/software/corenlp.shtml), a set natural language processing tools for tokenization, part-of-speech tagging, lemmatization, and parsing of several languages, as well as named entity recognition and coreference resolution in English. This gem is compatible with Ruby 1.9.2 and above.
|
6
6
|
|
7
|
+
If you are looking for an full-scale natural language processing framework in Ruby, have a look at [Treat](https://github.com/louismullie/treat).
|
8
|
+
|
7
9
|
**Installing**
|
8
10
|
|
9
11
|
First, install the gem: `gem install stanford-core-nlp`. Then, download the Stanford Core NLP JAR and model files. Three different packages are available:
|
data/lib/stanford-core-nlp.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module StanfordCoreNLP
|
2
2
|
|
3
|
-
VERSION = '0.3.
|
3
|
+
VERSION = '0.3.1'
|
4
4
|
|
5
5
|
require 'bind-it'
|
6
6
|
extend BindIt::Binding
|
@@ -9,9 +9,10 @@ module StanfordCoreNLP
|
|
9
9
|
# BindIt Configuration Options #
|
10
10
|
# ############################ #
|
11
11
|
|
12
|
-
# The path
|
13
|
-
#
|
14
|
-
self.jar_path = File.dirname(__FILE__)
|
12
|
+
# The default path for the JAR files
|
13
|
+
# is the gem's bin folder.
|
14
|
+
self.jar_path = File.dirname(__FILE__).
|
15
|
+
gsub('/lib', '') + '/bin/'
|
15
16
|
|
16
17
|
# Load the JVM with a minimum heap size of 512MB,
|
17
18
|
# and a maximum heap size of 1024MB.
|
@@ -24,6 +25,7 @@ module StanfordCoreNLP
|
|
24
25
|
self.default_jars = [
|
25
26
|
'joda-time.jar',
|
26
27
|
'xom.jar',
|
28
|
+
'stanford-parser.jar',
|
27
29
|
'stanford-corenlp.jar',
|
28
30
|
'bridge.jar'
|
29
31
|
]
|
@@ -42,7 +44,11 @@ module StanfordCoreNLP
|
|
42
44
|
|
43
45
|
# Default namespace is the Stanford pipeline namespace.
|
44
46
|
self.default_namespace = 'edu.stanford.nlp.pipeline'
|
45
|
-
|
47
|
+
|
48
|
+
# ########################### #
|
49
|
+
# Stanford Core NLP bindings #
|
50
|
+
# ########################### #
|
51
|
+
|
46
52
|
require 'stanford-core-nlp/config'
|
47
53
|
require 'stanford-core-nlp/bridge'
|
48
54
|
|
@@ -51,6 +57,8 @@ module StanfordCoreNLP
|
|
51
57
|
attr_accessor :model_files
|
52
58
|
# The folder in which to look for models.
|
53
59
|
attr_accessor :model_path
|
60
|
+
# Store the language currently being used.
|
61
|
+
attr_accessor :language
|
54
62
|
end
|
55
63
|
|
56
64
|
# The path to the main folder containing the folders
|
@@ -63,6 +71,7 @@ module StanfordCoreNLP
|
|
63
71
|
# code (e.g. :english, :eng or :en will work).
|
64
72
|
def self.use(language)
|
65
73
|
lang = nil
|
74
|
+
self.language = language
|
66
75
|
self.model_files = {}
|
67
76
|
Config::LanguageCodes.each do |l,codes|
|
68
77
|
lang = codes[2] if codes.include?(language)
|
@@ -99,8 +108,15 @@ module StanfordCoreNLP
|
|
99
108
|
# properties.
|
100
109
|
def self.load(*annotators)
|
101
110
|
|
111
|
+
# Take care of Windows users.
|
112
|
+
if self.running_on_windows?
|
113
|
+
self.jar_path.gsub!('/', '\\')
|
114
|
+
self.model_path.gsub!('/', '\\')
|
115
|
+
end
|
116
|
+
|
102
117
|
# Make the bindings.
|
103
118
|
self.bind
|
119
|
+
|
104
120
|
# Prepend the JAR path to the model files.
|
105
121
|
properties = {}
|
106
122
|
self.model_files.each do |k,v|
|
@@ -119,8 +135,14 @@ module StanfordCoreNLP
|
|
119
135
|
properties[k] = f
|
120
136
|
end
|
121
137
|
|
138
|
+
# Bug fix for French parser
|
139
|
+
if self.language == :french
|
140
|
+
properties['parser.flags'] = ''
|
141
|
+
end
|
142
|
+
|
122
143
|
properties['annotators'] =
|
123
144
|
annotators.map { |x| x.to_s }.join(', ')
|
145
|
+
|
124
146
|
CoreNLP.new(get_properties(properties))
|
125
147
|
end
|
126
148
|
|
@@ -132,7 +154,7 @@ module StanfordCoreNLP
|
|
132
154
|
end
|
133
155
|
props
|
134
156
|
end
|
135
|
-
|
157
|
+
|
136
158
|
# Get a Java ArrayList binding to pass lists
|
137
159
|
# of tokens to the Stanford Core NLP process.
|
138
160
|
def self.get_list(tokens)
|
@@ -143,4 +165,9 @@ module StanfordCoreNLP
|
|
143
165
|
list
|
144
166
|
end
|
145
167
|
|
168
|
+
# Returns true if we're running on Windows.
|
169
|
+
def self.running_on_windows?
|
170
|
+
RUBY_PLATFORM.split("-")[1] == 'mswin32'
|
171
|
+
end
|
172
|
+
|
146
173
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stanford-core-nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-05-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bind-it
|