stanford-core-nlp 0.3.2 → 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +2 -2
- data/bin/AnnotationBridge.java +12 -0
- data/bin/Stanford.java +13 -0
- data/lib/stanford-core-nlp.rb +6 -5
- data/lib/stanford-core-nlp/config.rb +7 -12
- metadata +5 -3
data/README.md
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
[![Build Status](https://secure.travis-ci.org/louismullie/stanford-core-nlp.png)](http://travis-ci.org/louismullie/stanford-core-nlp)
|
2
|
-
|
3
1
|
**About**
|
4
2
|
|
5
3
|
This gem provides high-level Ruby bindings to the [Stanford Core NLP package](http://nlp.stanford.edu/software/corenlp.shtml), a set natural language processing tools for tokenization, part-of-speech tagging, lemmatization, and parsing of several languages, as well as named entity recognition and coreference resolution in English. This gem is compatible with Ruby 1.9.2 and above.
|
@@ -8,6 +6,8 @@ If you are looking for an full-scale natural language processing framework in Ru
|
|
8
6
|
|
9
7
|
**Installing**
|
10
8
|
|
9
|
+
_Note: This gem uses the Ruby-Java Bridge (Rjb), which currently does not support Java 7. Therefore, if you have installed Java 7, you should set your JAVA_HOME to point to your old Java 6 install before installing Rjb; for example, `export "JAVA_HOME=/usr/lib/jvm/java-6-openjdk/"`._
|
10
|
+
|
11
11
|
First, install the gem: `gem install stanford-core-nlp`. Then, download the Stanford Core NLP JAR and model files. Three different packages are available:
|
12
12
|
|
13
13
|
* A [minimal package for English](http://louismullie.com/treat/stanford-core-nlp-minimal.zip) with one tagger model and one parser model for English.
|
@@ -0,0 +1,12 @@
|
|
1
|
+
import edu.stanford.nlp.ling.CoreAnnotation;
|
2
|
+
import edu.stanford.nlp.util.ArrayCoreMap;
|
3
|
+
|
4
|
+
public class AnnotationBridge {
|
5
|
+
|
6
|
+
public static Object getAnnotation(Object entity, String name) throws ClassNotFoundException {
|
7
|
+
Class<CoreAnnotation> klass;
|
8
|
+
klass = (Class<CoreAnnotation>) Class.forName(name);
|
9
|
+
Object object = ((ArrayCoreMap) entity).get(klass);
|
10
|
+
return object;
|
11
|
+
}
|
12
|
+
}
|
data/bin/Stanford.java
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
import edu.stanford.nlp.ling.CoreAnnotation;
|
2
|
+
import edu.stanford.nlp.ling.CoreLabel;
|
3
|
+
|
4
|
+
public class Stanford {
|
5
|
+
|
6
|
+
public static Object getAnnotation(CoreLabel entity, String name) throws ClassNotFoundException{
|
7
|
+
Class<CoreAnnotation> klass;
|
8
|
+
klass = (Class<CoreAnnotation>) Class.forName(name);
|
9
|
+
Object object = entity.get(klass);
|
10
|
+
return object;
|
11
|
+
}
|
12
|
+
|
13
|
+
}
|
data/lib/stanford-core-nlp.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module StanfordCoreNLP
|
2
2
|
|
3
|
-
VERSION = '0.3.
|
3
|
+
VERSION = '0.3.4'
|
4
4
|
|
5
5
|
require 'bind-it'
|
6
6
|
extend BindIt::Binding
|
@@ -12,7 +12,7 @@ module StanfordCoreNLP
|
|
12
12
|
# The default path for the JAR files
|
13
13
|
# is the gem's bin folder.
|
14
14
|
self.jar_path = File.dirname(__FILE__).
|
15
|
-
gsub(
|
15
|
+
gsub(/\/lib\z/, '') + '/bin/'
|
16
16
|
|
17
17
|
# Load the JVM with a minimum heap size of 512MB,
|
18
18
|
# and a maximum heap size of 1024MB.
|
@@ -94,7 +94,7 @@ module StanfordCoreNLP
|
|
94
94
|
end
|
95
95
|
|
96
96
|
# Use english by default.
|
97
|
-
self.use
|
97
|
+
self.use :english
|
98
98
|
|
99
99
|
# Set a model file.
|
100
100
|
def self.set_model(name, file)
|
@@ -135,10 +135,11 @@ module StanfordCoreNLP
|
|
135
135
|
properties[k] = f
|
136
136
|
end
|
137
137
|
|
138
|
-
# Bug fix for French parser due to Stanford bug
|
138
|
+
# Bug fix for French/German parser due to Stanford bug.
|
139
139
|
# Otherwise throws IllegalArgumentException:
|
140
140
|
# Unknown option: -retainTmpSubcategories
|
141
|
-
if self.language == :french
|
141
|
+
if self.language == :french ||
|
142
|
+
self.language == :german
|
142
143
|
properties['parser.flags'] = ''
|
143
144
|
end
|
144
145
|
|
@@ -9,8 +9,7 @@ module StanfordCoreNLP
|
|
9
9
|
:german => [:de, :ger, :german],
|
10
10
|
:french => [:fr, :fre, :french],
|
11
11
|
:arabic => [:ar, :ara, :arabic],
|
12
|
-
:chinese => [:ch, :chi, :chinese]
|
13
|
-
:xinhua => [:xi, :xin, :xinhua]
|
12
|
+
:chinese => [:ch, :chi, :chinese]
|
14
13
|
}
|
15
14
|
|
16
15
|
# Folders inside the JAR path for the models.
|
@@ -37,8 +36,7 @@ module StanfordCoreNLP
|
|
37
36
|
:german => 'german-fast.tagger',
|
38
37
|
:french => 'french.tagger',
|
39
38
|
:arabic => 'arabic-fast.tagger',
|
40
|
-
:chinese => 'chinese.tagger'
|
41
|
-
:xinhua => nil
|
39
|
+
:chinese => 'chinese.tagger'
|
42
40
|
},
|
43
41
|
|
44
42
|
:parser => {
|
@@ -46,8 +44,7 @@ module StanfordCoreNLP
|
|
46
44
|
:german => 'germanPCFG.ser.gz',
|
47
45
|
:french => 'frenchFactored.ser.gz',
|
48
46
|
:arabic => 'arabicFactored.ser.gz',
|
49
|
-
:chinese => 'chinesePCFG.ser.gz'
|
50
|
-
:xinhua => 'xinhuaPCFG.ser.gz'
|
47
|
+
:chinese => 'chinesePCFG.ser.gz'
|
51
48
|
},
|
52
49
|
|
53
50
|
:ner => {
|
@@ -59,8 +56,7 @@ module StanfordCoreNLP
|
|
59
56
|
:german => {},
|
60
57
|
:french => {},
|
61
58
|
:arabic => {},
|
62
|
-
:chinese => {}
|
63
|
-
:xinhua => {}
|
59
|
+
:chinese => {}
|
64
60
|
},
|
65
61
|
|
66
62
|
:dcoref => {
|
@@ -74,15 +70,14 @@ module StanfordCoreNLP
|
|
74
70
|
'plural' => 'plural.unigrams.txt',
|
75
71
|
'singular' => 'singular.unigrams.txt',
|
76
72
|
'states' => 'state-abbreviations.txt',
|
77
|
-
'countries' => '
|
78
|
-
'states.provinces' => '
|
73
|
+
'countries' => 'countries',
|
74
|
+
'states.provinces' => 'statesandprovinces',
|
79
75
|
'extra.gender' => 'namegender.combine.txt'
|
80
76
|
},
|
81
77
|
:german => {},
|
82
78
|
:french => {},
|
83
79
|
:arabic => {},
|
84
|
-
:chinese => {}
|
85
|
-
:xinhua => {}
|
80
|
+
:chinese => {}
|
86
81
|
}
|
87
82
|
|
88
83
|
# Models to add.
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stanford-core-nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-12-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bind-it
|
@@ -40,7 +40,9 @@ files:
|
|
40
40
|
- lib/stanford-core-nlp/bridge.rb
|
41
41
|
- lib/stanford-core-nlp/config.rb
|
42
42
|
- lib/stanford-core-nlp.rb
|
43
|
+
- bin/AnnotationBridge.java
|
43
44
|
- bin/bridge.jar
|
45
|
+
- bin/Stanford.java
|
44
46
|
- README.md
|
45
47
|
- LICENSE
|
46
48
|
homepage: https://github.com/louismullie/stanford-core-nlp
|
@@ -63,7 +65,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
65
|
version: '0'
|
64
66
|
requirements: []
|
65
67
|
rubyforge_project:
|
66
|
-
rubygems_version: 1.8.
|
68
|
+
rubygems_version: 1.8.24
|
67
69
|
signing_key:
|
68
70
|
specification_version: 3
|
69
71
|
summary: Ruby bindings to the Stanford Core NLP tools.
|