stanford-core-nlp 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -0
- data/lib/stanford-core-nlp.rb +33 -6
- metadata +2 -2
data/README.md
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
|
5
5
|
This gem provides high-level Ruby bindings to the [Stanford Core NLP package](http://nlp.stanford.edu/software/corenlp.shtml), a set natural language processing tools for tokenization, part-of-speech tagging, lemmatization, and parsing of several languages, as well as named entity recognition and coreference resolution in English. This gem is compatible with Ruby 1.9.2 and above.
|
6
6
|
|
7
|
+
If you are looking for an full-scale natural language processing framework in Ruby, have a look at [Treat](https://github.com/louismullie/treat).
|
8
|
+
|
7
9
|
**Installing**
|
8
10
|
|
9
11
|
First, install the gem: `gem install stanford-core-nlp`. Then, download the Stanford Core NLP JAR and model files. Three different packages are available:
|
data/lib/stanford-core-nlp.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module StanfordCoreNLP
|
2
2
|
|
3
|
-
VERSION = '0.3.
|
3
|
+
VERSION = '0.3.1'
|
4
4
|
|
5
5
|
require 'bind-it'
|
6
6
|
extend BindIt::Binding
|
@@ -9,9 +9,10 @@ module StanfordCoreNLP
|
|
9
9
|
# BindIt Configuration Options #
|
10
10
|
# ############################ #
|
11
11
|
|
12
|
-
# The path
|
13
|
-
#
|
14
|
-
self.jar_path = File.dirname(__FILE__)
|
12
|
+
# The default path for the JAR files
|
13
|
+
# is the gem's bin folder.
|
14
|
+
self.jar_path = File.dirname(__FILE__).
|
15
|
+
gsub('/lib', '') + '/bin/'
|
15
16
|
|
16
17
|
# Load the JVM with a minimum heap size of 512MB,
|
17
18
|
# and a maximum heap size of 1024MB.
|
@@ -24,6 +25,7 @@ module StanfordCoreNLP
|
|
24
25
|
self.default_jars = [
|
25
26
|
'joda-time.jar',
|
26
27
|
'xom.jar',
|
28
|
+
'stanford-parser.jar',
|
27
29
|
'stanford-corenlp.jar',
|
28
30
|
'bridge.jar'
|
29
31
|
]
|
@@ -42,7 +44,11 @@ module StanfordCoreNLP
|
|
42
44
|
|
43
45
|
# Default namespace is the Stanford pipeline namespace.
|
44
46
|
self.default_namespace = 'edu.stanford.nlp.pipeline'
|
45
|
-
|
47
|
+
|
48
|
+
# ########################### #
|
49
|
+
# Stanford Core NLP bindings #
|
50
|
+
# ########################### #
|
51
|
+
|
46
52
|
require 'stanford-core-nlp/config'
|
47
53
|
require 'stanford-core-nlp/bridge'
|
48
54
|
|
@@ -51,6 +57,8 @@ module StanfordCoreNLP
|
|
51
57
|
attr_accessor :model_files
|
52
58
|
# The folder in which to look for models.
|
53
59
|
attr_accessor :model_path
|
60
|
+
# Store the language currently being used.
|
61
|
+
attr_accessor :language
|
54
62
|
end
|
55
63
|
|
56
64
|
# The path to the main folder containing the folders
|
@@ -63,6 +71,7 @@ module StanfordCoreNLP
|
|
63
71
|
# code (e.g. :english, :eng or :en will work).
|
64
72
|
def self.use(language)
|
65
73
|
lang = nil
|
74
|
+
self.language = language
|
66
75
|
self.model_files = {}
|
67
76
|
Config::LanguageCodes.each do |l,codes|
|
68
77
|
lang = codes[2] if codes.include?(language)
|
@@ -99,8 +108,15 @@ module StanfordCoreNLP
|
|
99
108
|
# properties.
|
100
109
|
def self.load(*annotators)
|
101
110
|
|
111
|
+
# Take care of Windows users.
|
112
|
+
if self.running_on_windows?
|
113
|
+
self.jar_path.gsub!('/', '\\')
|
114
|
+
self.model_path.gsub!('/', '\\')
|
115
|
+
end
|
116
|
+
|
102
117
|
# Make the bindings.
|
103
118
|
self.bind
|
119
|
+
|
104
120
|
# Prepend the JAR path to the model files.
|
105
121
|
properties = {}
|
106
122
|
self.model_files.each do |k,v|
|
@@ -119,8 +135,14 @@ module StanfordCoreNLP
|
|
119
135
|
properties[k] = f
|
120
136
|
end
|
121
137
|
|
138
|
+
# Bug fix for French parser
|
139
|
+
if self.language == :french
|
140
|
+
properties['parser.flags'] = ''
|
141
|
+
end
|
142
|
+
|
122
143
|
properties['annotators'] =
|
123
144
|
annotators.map { |x| x.to_s }.join(', ')
|
145
|
+
|
124
146
|
CoreNLP.new(get_properties(properties))
|
125
147
|
end
|
126
148
|
|
@@ -132,7 +154,7 @@ module StanfordCoreNLP
|
|
132
154
|
end
|
133
155
|
props
|
134
156
|
end
|
135
|
-
|
157
|
+
|
136
158
|
# Get a Java ArrayList binding to pass lists
|
137
159
|
# of tokens to the Stanford Core NLP process.
|
138
160
|
def self.get_list(tokens)
|
@@ -143,4 +165,9 @@ module StanfordCoreNLP
|
|
143
165
|
list
|
144
166
|
end
|
145
167
|
|
168
|
+
# Returns true if we're running on Windows.
|
169
|
+
def self.running_on_windows?
|
170
|
+
RUBY_PLATFORM.split("-")[1] == 'mswin32'
|
171
|
+
end
|
172
|
+
|
146
173
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stanford-core-nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-05-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bind-it
|