open-nlp 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -2,12 +2,10 @@
2
2
 
3
3
  ###About
4
4
 
5
- This library provides high-level Ruby bindings to the Open NLP package, a Java machine learning toolkit for natural language processing (NLP).
5
+ This library provides high-level Ruby bindings to the Open NLP package, a Java machine learning toolkit for natural language processing (NLP). This gem is compatible with Ruby 1.9.2 and 1.9.3 as well as JRuby 1.7.1. It is tested on both Java 6 and Java 7.
6
6
 
7
7
  ###Installing
8
8
 
9
- __Note: If you are running on MRI, this gem will use the Ruby-Java Bridge (Rjb), which currently does not support Java 7. Therefore, if you have installed Java 7, you should set your JAVA_HOME to point to your old Java 6 install before installing Rjb; for example, `export "JAVA_HOME=/usr/lib/jvm/java-6-openjdk/"`.__
10
-
11
9
  First, install the gem: `gem install open-nlp`. Then, individually download the appropriate models from the [open-nlp website](http://opennlp.sourceforge.net/models-1.5/) or just get [all English language models](louismullie.com/treat/open-nlp-english.zip) in one package (80 MB).
12
10
 
13
11
  Place the contents of the extracted archive inside the /bin/ folder of the open-nlp gem (e.g. [...]/gems/open-nlp-0.x.x/bin/).
@@ -39,6 +37,8 @@ OpenNLP.jvm_args = ['-option1', '-option2']
39
37
  # Redirect VM output to log.txt
40
38
  OpenNLP.log_file = 'log.txt'
41
39
 
40
+ # Set default models for a language.
41
+ OpenNLP.use :language
42
42
  ```
43
43
 
44
44
  ###Examples
Binary file
@@ -0,0 +1,107 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+
3
+ <!--
4
+ Licensed to the Apache Software Foundation (ASF) under one
5
+ or more contributor license agreements. See the NOTICE file
6
+ distributed with this work for additional information
7
+ regarding copyright ownership. The ASF licenses this file
8
+ to you under the Apache License, Version 2.0 (the
9
+ "License"); you may not use this file except in compliance
10
+ with the License. You may obtain a copy of the License at
11
+
12
+ http://www.apache.org/licenses/LICENSE-2.0
13
+
14
+ Unless required by applicable law or agreed to in writing,
15
+ software distributed under the License is distributed on an
16
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17
+ KIND, either express or implied. See the License for the
18
+ specific language governing permissions and limitations
19
+ under the License.
20
+ -->
21
+
22
+ <dictionary>
23
+ <entry operation="RIGHT_LEFT_MATCHING">
24
+ <token>"</token>
25
+ </entry>
26
+ <entry operation="RIGHT_LEFT_MATCHING">
27
+ <token>'</token>
28
+ </entry>
29
+ <entry operation="MOVE_LEFT">
30
+ <token>.</token>
31
+ </entry>
32
+ <entry operation="MOVE_LEFT">
33
+ <token>?</token>
34
+ </entry>
35
+ <entry operation="MOVE_LEFT">
36
+ <token>!</token>
37
+ </entry>
38
+ <entry operation="MOVE_LEFT">
39
+ <token>,</token>
40
+ </entry>
41
+ <entry operation="MOVE_LEFT">
42
+ <token>;</token>
43
+ </entry>
44
+ <entry operation="MOVE_LEFT">
45
+ <token>:</token>
46
+ </entry>
47
+ <entry operation="MOVE_RIGHT">
48
+ <token>(</token>
49
+ </entry>
50
+ <entry operation="MOVE_LEFT">
51
+ <token>)</token>
52
+ </entry>
53
+ <entry operation="MOVE_LEFT">
54
+ <token>}</token>
55
+ </entry>
56
+ <entry operation="MOVE_RIGHT">
57
+ <token>{</token>
58
+ </entry>
59
+ <entry operation="MOVE_LEFT">
60
+ <token>]</token>
61
+ </entry>
62
+ <entry operation="MOVE_RIGHT">
63
+ <token>[</token>
64
+ </entry>
65
+ <entry operation="MOVE_RIGHT">
66
+ <token>``</token>
67
+ </entry>
68
+ <entry operation="MOVE_LEFT">
69
+ <token>''</token>
70
+ </entry>
71
+ <entry operation="MOVE_LEFT">
72
+ <token>%</token>
73
+ </entry>
74
+ <entry operation="MOVE_LEFT">
75
+ <token>n't</token>
76
+ </entry>
77
+ <entry operation="MOVE_LEFT">
78
+ <token>'ve</token>
79
+ </entry>
80
+ <entry operation="MOVE_LEFT">
81
+ <token>'d</token>
82
+ </entry>
83
+ <entry operation="MOVE_LEFT">
84
+ <token>'ll</token>
85
+ </entry>
86
+ <entry operation="MOVE_LEFT">
87
+ <token>'s</token>
88
+ </entry>
89
+ <entry operation="MOVE_LEFT">
90
+ <token>'re</token>
91
+ </entry>
92
+ <entry operation="MOVE_LEFT">
93
+ <token>'m</token>
94
+ </entry>
95
+ <entry operation="MOVE_LEFT">
96
+ <token>.org</token>
97
+ </entry>
98
+ <entry operation="MOVE_LEFT">
99
+ <token>.com</token>
100
+ </entry>
101
+ <entry operation="MOVE_LEFT">
102
+ <token>.net</token>
103
+ </entry>
104
+ <entry operation="MOVE_RIGHT">
105
+ <token>#</token>
106
+ </entry>
107
+ </dictionary>
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -1,7 +1,7 @@
1
1
  module OpenNLP
2
2
 
3
3
  # Library version.
4
- VERSION = '0.1.1'
4
+ VERSION = '0.1.2'
5
5
 
6
6
  # Require Java bindings.
7
7
  require 'open-nlp/bindings'
@@ -30,18 +30,34 @@ module OpenNLP::Config
30
30
  english: 'en-detokenizer.xml'
31
31
  },
32
32
  # Intentionally left empty.
33
+ # Available for English, Spanish, Dutch.
33
34
  name_finder: {},
34
35
  parser: {
35
36
  english: 'en-parser-chunking.bin'
36
37
  },
37
38
  pos_tagger: {
38
- english: 'en-pos-maxent.bin'
39
+ english: 'en-pos-maxent.bin',
40
+ danish: 'da-pos-maxent.bin',
41
+ german: 'de-pos-maxent.bin',
42
+ dutch: 'nl-pos-maxent.bin',
43
+ portuguese: 'pt-pos-maxent.bin',
44
+ swedish: 'se-pos-maxent.bin'
39
45
  },
40
46
  sentence_detector: {
41
- english: 'en-sent.bin'
47
+ english: 'en-sent.bin',
48
+ german: 'de-sent.bin',
49
+ danish: 'da-sent.bin',
50
+ dutch: 'nl-sent.bin',
51
+ portuguese: 'pt-sent.bin',
52
+ swedish: 'se-sent.bin'
42
53
  },
43
54
  tokenizer: {
44
- english: 'en-token.bin'
55
+ english: 'en-token.bin',
56
+ danish: 'da-token.bin',
57
+ german: 'de-token.bin',
58
+ dutch: 'nl-token.bin',
59
+ portuguese: 'pt-token.bin',
60
+ swedish: 'se-token.bin'
45
61
  }
46
62
  }
47
63
 
@@ -29,8 +29,8 @@ describe OpenNLP do
29
29
 
30
30
  OpenNLP.models[:pos_tagger].get_pos_model.to_s
31
31
  .index('opennlp.perceptron.PerceptronModel').should_not be_nil
32
-
33
- tags.should eql ["DT", "NN", "IN", "DT", "NN", "VBD", "VBN", "IN", "PRP$", "NNS", "."]
32
+
33
+ tags.to_a.should eql ["DT", "NN", "IN", "DT", "NN", "VBD", "VBN", "IN", "PRP$", "NNS", "."]
34
34
 
35
35
  end
36
36
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: open-nlp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,24 +9,24 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-22 00:00:00.000000000 Z
12
+ date: 2012-12-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bind-it
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
- - - ! '>='
19
+ - - ~>
20
20
  - !ruby/object:Gem::Version
21
- version: '0'
21
+ version: 0.2.5
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
25
25
  none: false
26
26
  requirements:
27
- - - ! '>='
27
+ - - ~>
28
28
  - !ruby/object:Gem::Version
29
- version: '0'
29
+ version: 0.2.5
30
30
  - !ruby/object:Gem::Dependency
31
31
  name: rspec
32
32
  requirement: !ruby/object:Gem::Requirement
@@ -51,6 +51,20 @@ executables: []
51
51
  extensions: []
52
52
  extra_rdoc_files: []
53
53
  files:
54
+ - bin/en-chunker.bin
55
+ - bin/en-detokenizer.xml
56
+ - bin/en-ner-date.bin
57
+ - bin/en-ner-location.bin
58
+ - bin/en-ner-money.bin
59
+ - bin/en-ner-organization.bin
60
+ - bin/en-ner-percentage.bin
61
+ - bin/en-ner-person.bin
62
+ - bin/en-ner-time.bin
63
+ - bin/en-parser-chunking.bin
64
+ - bin/en-pos-maxent.bin
65
+ - bin/en-pos-perceptron.bin
66
+ - bin/en-sent.bin
67
+ - bin/en-token.bin
54
68
  - bin/jwnl-1.3.3.jar
55
69
  - bin/opennlp-maxent-3.0.2-incubating.jar
56
70
  - bin/opennlp-tools-1.5.2-incubating.jar