stanford-core-nlp 0.3.2 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,5 +1,3 @@
1
- [![Build Status](https://secure.travis-ci.org/louismullie/stanford-core-nlp.png)](http://travis-ci.org/louismullie/stanford-core-nlp)
2
-
3
1
  **About**
4
2
 
5
3
  This gem provides high-level Ruby bindings to the [Stanford Core NLP package](http://nlp.stanford.edu/software/corenlp.shtml), a set natural language processing tools for tokenization, part-of-speech tagging, lemmatization, and parsing of several languages, as well as named entity recognition and coreference resolution in English. This gem is compatible with Ruby 1.9.2 and above.
@@ -8,6 +6,8 @@ If you are looking for an full-scale natural language processing framework in Ru
8
6
 
9
7
  **Installing**
10
8
 
9
+ _Note: This gem uses the Ruby-Java Bridge (Rjb), which currently does not support Java 7. Therefore, if you have installed Java 7, you should set your JAVA_HOME to point to your old Java 6 install before installing Rjb; for example, `export "JAVA_HOME=/usr/lib/jvm/java-6-openjdk/"`._
10
+
11
11
  First, install the gem: `gem install stanford-core-nlp`. Then, download the Stanford Core NLP JAR and model files. Three different packages are available:
12
12
 
13
13
  * A [minimal package for English](http://louismullie.com/treat/stanford-core-nlp-minimal.zip) with one tagger model and one parser model for English.
@@ -0,0 +1,12 @@
1
+ import edu.stanford.nlp.ling.CoreAnnotation;
2
+ import edu.stanford.nlp.util.ArrayCoreMap;
3
+
4
+ public class AnnotationBridge {
5
+
6
+ public static Object getAnnotation(Object entity, String name) throws ClassNotFoundException {
7
+ Class<CoreAnnotation> klass;
8
+ klass = (Class<CoreAnnotation>) Class.forName(name);
9
+ Object object = ((ArrayCoreMap) entity).get(klass);
10
+ return object;
11
+ }
12
+ }
data/bin/Stanford.java ADDED
@@ -0,0 +1,13 @@
1
+ import edu.stanford.nlp.ling.CoreAnnotation;
2
+ import edu.stanford.nlp.ling.CoreLabel;
3
+
4
+ public class Stanford {
5
+
6
+ public static Object getAnnotation(CoreLabel entity, String name) throws ClassNotFoundException{
7
+ Class<CoreAnnotation> klass;
8
+ klass = (Class<CoreAnnotation>) Class.forName(name);
9
+ Object object = entity.get(klass);
10
+ return object;
11
+ }
12
+
13
+ }
@@ -1,6 +1,6 @@
1
1
  module StanfordCoreNLP
2
2
 
3
- VERSION = '0.3.2'
3
+ VERSION = '0.3.4'
4
4
 
5
5
  require 'bind-it'
6
6
  extend BindIt::Binding
@@ -12,7 +12,7 @@ module StanfordCoreNLP
12
12
  # The default path for the JAR files
13
13
  # is the gem's bin folder.
14
14
  self.jar_path = File.dirname(__FILE__).
15
- gsub('/lib', '') + '/bin/'
15
+ gsub(/\/lib\z/, '') + '/bin/'
16
16
 
17
17
  # Load the JVM with a minimum heap size of 512MB,
18
18
  # and a maximum heap size of 1024MB.
@@ -94,7 +94,7 @@ module StanfordCoreNLP
94
94
  end
95
95
 
96
96
  # Use english by default.
97
- self.use(:english)
97
+ self.use :english
98
98
 
99
99
  # Set a model file.
100
100
  def self.set_model(name, file)
@@ -135,10 +135,11 @@ module StanfordCoreNLP
135
135
  properties[k] = f
136
136
  end
137
137
 
138
- # Bug fix for French parser due to Stanford bug
138
+ # Bug fix for French/German parser due to Stanford bug.
139
139
  # Otherwise throws IllegalArgumentException:
140
140
  # Unknown option: -retainTmpSubcategories
141
- if self.language == :french
141
+ if self.language == :french ||
142
+ self.language == :german
142
143
  properties['parser.flags'] = ''
143
144
  end
144
145
 
@@ -9,8 +9,7 @@ module StanfordCoreNLP
9
9
  :german => [:de, :ger, :german],
10
10
  :french => [:fr, :fre, :french],
11
11
  :arabic => [:ar, :ara, :arabic],
12
- :chinese => [:ch, :chi, :chinese],
13
- :xinhua => [:xi, :xin, :xinhua]
12
+ :chinese => [:ch, :chi, :chinese]
14
13
  }
15
14
 
16
15
  # Folders inside the JAR path for the models.
@@ -37,8 +36,7 @@ module StanfordCoreNLP
37
36
  :german => 'german-fast.tagger',
38
37
  :french => 'french.tagger',
39
38
  :arabic => 'arabic-fast.tagger',
40
- :chinese => 'chinese.tagger',
41
- :xinhua => nil
39
+ :chinese => 'chinese.tagger'
42
40
  },
43
41
 
44
42
  :parser => {
@@ -46,8 +44,7 @@ module StanfordCoreNLP
46
44
  :german => 'germanPCFG.ser.gz',
47
45
  :french => 'frenchFactored.ser.gz',
48
46
  :arabic => 'arabicFactored.ser.gz',
49
- :chinese => 'chinesePCFG.ser.gz',
50
- :xinhua => 'xinhuaPCFG.ser.gz'
47
+ :chinese => 'chinesePCFG.ser.gz'
51
48
  },
52
49
 
53
50
  :ner => {
@@ -59,8 +56,7 @@ module StanfordCoreNLP
59
56
  :german => {},
60
57
  :french => {},
61
58
  :arabic => {},
62
- :chinese => {},
63
- :xinhua => {}
59
+ :chinese => {}
64
60
  },
65
61
 
66
62
  :dcoref => {
@@ -74,15 +70,14 @@ module StanfordCoreNLP
74
70
  'plural' => 'plural.unigrams.txt',
75
71
  'singular' => 'singular.unigrams.txt',
76
72
  'states' => 'state-abbreviations.txt',
77
- 'countries' => 'unknown.txt', # Fix - can somebody provide this file?
78
- 'states.provinces' => 'unknown.txt', # Fix - can somebody provide this file?
73
+ 'countries' => 'countries',
74
+ 'states.provinces' => 'statesandprovinces',
79
75
  'extra.gender' => 'namegender.combine.txt'
80
76
  },
81
77
  :german => {},
82
78
  :french => {},
83
79
  :arabic => {},
84
- :chinese => {},
85
- :xinhua => {}
80
+ :chinese => {}
86
81
  }
87
82
 
88
83
  # Models to add.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stanford-core-nlp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-15 00:00:00.000000000 Z
12
+ date: 2012-12-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bind-it
@@ -40,7 +40,9 @@ files:
40
40
  - lib/stanford-core-nlp/bridge.rb
41
41
  - lib/stanford-core-nlp/config.rb
42
42
  - lib/stanford-core-nlp.rb
43
+ - bin/AnnotationBridge.java
43
44
  - bin/bridge.jar
45
+ - bin/Stanford.java
44
46
  - README.md
45
47
  - LICENSE
46
48
  homepage: https://github.com/louismullie/stanford-core-nlp
@@ -63,7 +65,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
63
65
  version: '0'
64
66
  requirements: []
65
67
  rubyforge_project:
66
- rubygems_version: 1.8.21
68
+ rubygems_version: 1.8.24
67
69
  signing_key:
68
70
  specification_version: 3
69
71
  summary: Ruby bindings to the Stanford Core NLP tools.