nlp_toolz 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 71916455cffe07c8464fb8cc1543d7b8a2ea7205
4
+ data.tar.gz: bc30072b7d62770c3e202e0545137056fe5a6164
5
+ SHA512:
6
+ metadata.gz: 997d3fc4fb5d9c18546e1ea4c5c8acd19e61ef6979ece0d27cff540cea99c2ecae094fba16a4c3aa25dc05f1fe9282498c228a898b68b4271e493027663e0ba3
7
+ data.tar.gz: 42d5ea917f3febe6484a80ab085f0b41515540f841edc2de4b219d06456d7d331a750fb306095336918b4c82f4cd184d1dc6099cd4ff0fd51e2cb487adab9944
data/.gitignore ADDED
@@ -0,0 +1,28 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+
19
+ .rvmrc
20
+
21
+ ToDo.task
22
+
23
+ teste.rb
24
+
25
+ .DS_Store
26
+ test-data/
27
+ jars/*
28
+ models/*
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in nlp_toolz.gemspec
4
+ gemspec
5
+
6
+ # gem 'birch', git: 'git://github.com/louismullie/birch.git'
data/Guardfile ADDED
@@ -0,0 +1,13 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard :bundler do
5
+ watch('Gemfile')
6
+ watch(/^.+\.gemspec/)
7
+ end
8
+
9
+ guard :rspec do
10
+ watch(%r{^spec/.+_spec\.rb$})
11
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
12
+ watch('spec/spec_helper.rb') { "spec" }
13
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 LeFnord
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # NlpToolz
2
+
3
+ Basic NLP tools, mostly based on [OpenNLP](http://opennlp.apache.org), at this time `sentence finder`, `tokenizer` and `POS tagger` implemented, plus [Berkeley Parser](http://code.google.com/p/berkeleyparser/).
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'nlp_toolz'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install nlp_toolz
18
+
19
+ Download jars and model files from [Dropbox](https://www.dropbox.com/sh/1layyjgf5h0wwi3/s2SHAnfVhs) and unzip it in gem folder.
20
+
21
+ ## Usage
22
+
23
+ see: [nlp_toolz.rb](https://github.com/LeFnord/nlp_toolz/blob/master/lib/nlp_toolz.rb) and specs for usage
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create new Pull Request
32
+
33
+ ## Comments
34
+
35
+ - removed Celluloid, do concurrency in your app, where it be used
36
+ - check `load_jars` for JVM parameters
37
+
data/Rakefile ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require "awesome_print"
4
+ require 'rspec/core'
5
+ require 'rspec/core/rake_task'
6
+ RSpec::Core::RakeTask.new(:spec) do |spec|
7
+ spec.pattern = FileList['spec/**/*_spec.rb']
8
+ end
9
+
10
+ task :default => :spec
11
+
12
+ require 'yard'
13
+ YARD::Rake::YardocTask.new
14
+
15
+ Dir["lib/tasks/**/*.rake"].sort.each { |ext| load ext }
data/bin/nlp_toolz ADDED
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'gli'
4
+ begin # XXX: Remove this begin/rescue before distributing your app
5
+ require 'nlp_toolz'
6
+ rescue LoadError
7
+ STDERR.puts "In development, you need to use `bundle exec bin/nlp_toolz` to run your app"
8
+ STDERR.puts "At install-time, RubyGems will make sure lib, etc. are in the load path"
9
+ STDERR.puts "Feel free to remove this message from bin/NlpToolz now"
10
+ exit 64
11
+ end
12
+
13
+ include GLI::App
14
+
15
+ # helper methods
16
+ def get_out(this)
17
+ ap this if $stdout.tty?
18
+ $stdout.puts this unless $stdout.tty?
19
+ end
20
+
21
+ def get_in(input_arg)
22
+ if File.exists?(input_arg) && !File.directory?(input_arg)
23
+ return get_file(input_arg)
24
+ else
25
+ return input_arg
26
+ end
27
+ end
28
+
29
+ def get_file(name)
30
+ file = File.open(name).gets(nil)
31
+ "" if file.nil?
32
+ file.force_encoding("utf-8") unless file.nil?
33
+ end
34
+
35
+
36
+ program_desc 'running basic NLP tasks'
37
+
38
+ version NlpToolz::VERSION
39
+
40
+ desc 'sentence detection'
41
+ arg_name 'Describe arguments to sent here'
42
+ command :sent do |c|
43
+ c.desc 'file input'
44
+ c.arg_name '<path/to/file>'
45
+ c.flag [:f,:file]
46
+ c.action do |global_options,options,args|
47
+ input = get_in(options[:f] || args.first)
48
+ get_out NlpToolz.get_sentences(input)
49
+ end
50
+ end
51
+
52
+ desc 'parsing text'
53
+ arg_name 'Describe arguments to parse here'
54
+ command :parse do |c|
55
+ c.desc 'file input'
56
+ c.arg_name '<path/to/file>'
57
+ c.flag [:f,:file]
58
+ c.action do |global_options,options,args|
59
+ input = get_in(options[:f] || args.first)
60
+ get_out NlpToolz.parse_text(input)
61
+ end
62
+ end
63
+
64
+ desc 'pos tagging of text'
65
+ arg_name 'Describe arguments to tag here'
66
+ command :tag do |c|
67
+ c.desc 'file input'
68
+ c.arg_name '<path/to/file>'
69
+ c.flag [:f,:file]
70
+ c.action do |global_options,options,args|
71
+ input = get_in(options[:f] || args.first)
72
+ get_out NlpToolz.tag_text(input)
73
+ end
74
+ end
75
+
76
+ desc 'tokenizing text'
77
+ arg_name 'Describe arguments to token here'
78
+ command :token do |c|
79
+ c.desc 'file input'
80
+ c.arg_name '<path/to/file>'
81
+ c.flag [:f,:file]
82
+ c.action do |global_options,options,args|
83
+ input = get_in(options[:f] || args.first)
84
+ get_out NlpToolz.tokenize_text(input)
85
+ end
86
+ end
87
+
88
+ on_error do |exception|
89
+ true
90
+ end
91
+
92
+ exit run(ARGV)
@@ -0,0 +1,36 @@
1
+ module Lang
2
+
3
+ include UrlHandler
4
+ # get language of input
5
+ def get_language(text = nil)
6
+ environment = ENV['ENV_NAME'] || 'development'
7
+ # ToDo 2013-03-14: respect environment
8
+ case environment
9
+ when 'development'
10
+ # development -> local
11
+ # uri = build_url("localhost", 9292, "/langid", nil)
12
+ uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
13
+ when 'production'
14
+ # production
15
+ uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
16
+ end
17
+
18
+ if @input
19
+ asv_response = post_data(URI.escape(@input),uri,{'Content-type'=>'text/plain;charset=utf-8'})
20
+ elsif text
21
+ asv_response = post_data(URI.escape(text),uri,{'Content-type'=>'text/plain;charset=utf-8'})
22
+ end
23
+ response = MultiJson.load(asv_response.body)
24
+
25
+ response["lang"]
26
+ end
27
+
28
+ # ToDo 2013-02-26: make different lang identifier available
29
+ def alternative_langs lang
30
+ langs = {
31
+ en: [:eng, :english],
32
+ de: [:ger, :german]
33
+ }.each.collect{|x| x.flatten}
34
+ end
35
+
36
+ end
@@ -0,0 +1,20 @@
1
+ # coding: utf-8
2
+
3
+ class String
4
+ # ToDo: check abbr against list of ..
5
+ def clean_up
6
+ foo = self.encode('UTF-8', :invalid => :replace, :undef => :replace)
7
+ bar = foo.gsub(/[\p{Pi}\p{Pf}"'„“‘’“”«»‹›]/,'') # quotation marks
8
+ .gsub(/\b\/\b/,' ')
9
+ .gsub(/(\p{Ps})(.)/,'\1 \2') # left braces
10
+ .gsub(/(.)(\p{Pe})/,'\1 \2') # right braces
11
+ .gsub(/([\w]{3,})([\.])/,'\1 \2') # abbrevation?
12
+ .gsub(/(.)([,;:!?]+)/,'\1 \2') # punctation
13
+ bar
14
+ end
15
+
16
+ def basename
17
+ self.split("/").last
18
+ end
19
+ end
20
+
@@ -0,0 +1,18 @@
1
+ require 'tempfile'
2
+
3
+ module TmpFile
4
+ module_function
5
+
6
+ def make_tmp_file_from text = nil
7
+ tmp_file = ::Tempfile.new('tmp.txt')
8
+ tmp_file.write text unless text.nil?
9
+ tmp_file.rewind
10
+ tmp_file
11
+ end
12
+
13
+ def delete_and_unlink_tmp_file tmp_file
14
+ tmp_file.close
15
+ tmp_file.unlink
16
+ end
17
+
18
+ end
@@ -0,0 +1,26 @@
1
+ require 'uri'
2
+ require 'net/http'
3
+
4
+ module UrlHandler
5
+ module ClassMethods
6
+ end
7
+
8
+ # instance methods
9
+ def build_url(host, port, path, query)
10
+ return URI::HTTP.build({:host => host, :path => path, :query => query}) if port.nil?
11
+ return URI::HTTP.build({:host => host, :port => port, :path => path, :query => query}) unless port.nil?
12
+ end
13
+
14
+
15
+ def post_data(content,uri,content_type)
16
+ post = Net::HTTP::Post.new(uri.request_uri,content_type)
17
+ post.body = content.force_encoding("utf-8")
18
+ uri_response = Net::HTTP.start(uri.host,uri.port) {|http| http.request(post)}
19
+
20
+ uri_response
21
+ end
22
+
23
+ def self.included(receiver)
24
+ receiver.extend ClassMethods
25
+ end
26
+ end
@@ -0,0 +1,22 @@
1
+ module NlpToolz
2
+ MODELS = File.join(File.dirname(__FILE__), '..', '..', "models")
3
+ JARS = File.join(File.dirname(__FILE__), '..', '..', "jars")
4
+
5
+ CLASS_PATH = [
6
+ File.join(JARS, "jwnl-1.3.3.jar"),
7
+ File.join(JARS, "opennlp-tools-1.5.3.jar"),
8
+ File.join(JARS, "opennlp-maxent-3.0.3.jar")
9
+ ].join(":")
10
+
11
+ Rjb::load(CLASS_PATH,['-Xmx4096m','-Djava.awt.headless=true'])
12
+ # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseParallelGC','-XX:+UseParallelOldGC','-Djava.awt.headless=true'])
13
+ # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseConcMarkSweepGC','-Djava.awt.headless=true'])
14
+ # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseSerialGC','-Djava.awt.headless=true'])
15
+ end
16
+
17
+ # simple example benchmarks, pos tagging 862 pharses:
18
+ # /wo extra options -> 656s
19
+ # /w ParallelGC -> 657s
20
+ # /w ConcMarkSweepGC -> 659s
21
+ # /w SerialGC -> 668s
22
+ # see: [Java GC tuning](http://www.oracle.com/technetwork/java/javase/gc-tuning-6-140523.html)
@@ -0,0 +1,146 @@
1
+ # coding: utf-8
2
+ # @author: LeFnord
3
+ # @email: pscholz.le@gmail.com
4
+ # @date: 2012-12-10
5
+
6
+ module NlpToolz
7
+
8
+ class Parser
9
+
10
+ include Lang
11
+ include TmpFile
12
+
13
+ # load java classes
14
+ FileInputStream = Rjb::import('java.io.FileInputStream')
15
+
16
+ attr_reader :parsed
17
+ attr_accessor :input, :lang, :model, :model_name, :parse_hash
18
+
19
+ def initialize(input, lang = nil)
20
+ @input = input
21
+ @lang = lang || get_language
22
+ @model_name = "#{@lang}-sm5.gr"
23
+ get_model
24
+ end
25
+
26
+ def parse_text
27
+ parsed = nil
28
+ if self.has_model?
29
+ jar = "#{JARS}/BerkeleyParser-1.7.jar"
30
+ in_file = make_tmp_file_from @input.clean_up
31
+ out_file = make_tmp_file_from
32
+ `java -Xmx4g -jar #{jar} -gr #{@model} -inputFile #{in_file.path} -outputFile #{out_file.path} -tokenize -maxLength 500`.chomp
33
+ @parsed = File.open(out_file).gets(nil).chomp
34
+
35
+ parse_output_to_hash
36
+
37
+ delete_and_unlink_tmp_file in_file
38
+ delete_and_unlink_tmp_file out_file
39
+ end
40
+ end
41
+
42
+ def has_model?
43
+ @model
44
+ end
45
+
46
+ def layer(level = nil)
47
+ @first_layer
48
+ end
49
+
50
+ def hash
51
+ @parse_hash
52
+ end
53
+
54
+ private
55
+
56
+ # helper for ...
57
+ # initialize
58
+ def get_model
59
+ model_file = "#{MODELS}/parser/#{@model_name}"
60
+ if File.exists?(model_file)
61
+ @model = model_file
62
+ else
63
+ @model = false
64
+ end
65
+ end
66
+
67
+ # convert: #tree -> #hash
68
+ def parse_output_to_hash
69
+ parsed = split_parse_tree(self.parsed)
70
+ nodes = create_leafs(parsed)
71
+ @parse_hash = make_hash_hash(nodes)
72
+
73
+ @parse_hash
74
+ end
75
+
76
+ # helper for parsing to hash
77
+ ::Leaf = Struct.new(:tag, :token)
78
+ ::Node = Struct.new(:tag, :parent, :childs)
79
+
80
+ # 1. split
81
+ def split_parse_tree(parsed)
82
+ bar = parsed.gsub("))", ") )").gsub("))", ") )")
83
+ .gsub("(", "{")
84
+ .gsub(")", "}")
85
+
86
+ bar.split
87
+ end
88
+
89
+ # 2. merge tags and tokens, create leafs
90
+ def create_leafs(parsed)
91
+ @first_layer = {tags: [],tokens: []}
92
+ leafs = {}
93
+ foo = []
94
+ parsed.each_with_index do |part,i|
95
+ if part =~ /\{([\w\-]+|\$\p{P}|\p{P})/ && parsed[i+1] =~ /([\p{L}\p{N}\-\.]+|\p{P})\}/
96
+ tag = part.gsub("{","")
97
+ token = parsed[i+1].gsub("}","")
98
+ @first_layer[:tags] << tag
99
+ @first_layer[:tokens] << token
100
+
101
+ leaf = Leaf.new(tag.to_sym,token)
102
+
103
+ if foo[foo.length-1].is_a?(Hash)
104
+ foo[foo.length-1] = [foo[foo.length-1], leaf]
105
+ elsif foo[foo.length-1].is_a?(Array)
106
+ foo[foo.length-1] << leaf
107
+ else
108
+ foo << leaf
109
+ end
110
+ elsif part !~ /([\p{L}\p{N}\-]+|\p{P})\}/
111
+ if part =~ /(\{)(.+)/
112
+ foo << "{#{part.gsub("{","")}"
113
+ else
114
+ foo << "#{part}"
115
+ end
116
+ end
117
+ end
118
+
119
+ foo
120
+ end
121
+
122
+ def make_hash_hash(nodes)
123
+ tmp = catch(:done) {
124
+ nodes.reverse.each_with_index do |node,i|
125
+ if node =~ /\{(\w+)/
126
+ key = node.match(/\{(\w+)/)[1].to_sym
127
+ part = []
128
+ nodes[-i-1..-1].each_with_index do |x,ii|
129
+ if x == "}"
130
+ part = {key => nodes[-i..-i+ii-2]}
131
+ throw :done, [nodes[0..-i-2],part,nodes[-i+ii..-1]].flatten
132
+ end
133
+ end
134
+ end
135
+ end
136
+ }
137
+ if tmp.length > 3
138
+ make_hash_hash(tmp)
139
+ else
140
+ tmp[1]
141
+ end
142
+ end
143
+
144
+ end # class Parser
145
+
146
+ end # module NlpToolz
@@ -0,0 +1,77 @@
1
+ # coding: utf-8
2
+ # @author: LeFnord
3
+ # @email: pscholz.le@gmail.com
4
+ # @date: 2012-10-24
5
+
6
+ # ToDo 2012-10-24: add train capabilities
7
+ module NlpToolz
8
+
9
+ class PosTags
10
+
11
+ include Lang
12
+
13
+ # load java classes
14
+ FileInputStream = Rjb::import('java.io.FileInputStream')
15
+ POSModel = Rjb::import('opennlp.tools.postag.POSModel')
16
+ POSTaggerME = Rjb::import('opennlp.tools.postag.POSTaggerME')
17
+
18
+ attr_accessor :input, :lang, :model, :model_name, :tokenized
19
+
20
+ def initialize(input, lang = nil)
21
+ @input = input
22
+ @lang = lang || get_language
23
+ @model_name = "#{@lang}-pos-maxent.bin"
24
+ get_model
25
+ end
26
+
27
+ def get_pos_tags
28
+ if self.has_model?
29
+ @tokenized = tokenize_it @tagger.tag(@input.clean_up)
30
+ end
31
+ end
32
+
33
+ def tokens
34
+ @tokenized[:tokens]
35
+ end
36
+
37
+ def tags
38
+ @tokenized[:tags]
39
+ end
40
+
41
+ def has_model?
42
+ @model
43
+ end
44
+
45
+ private
46
+
47
+ def get_model
48
+ model_file = "#{MODELS}/pos/#{@model_name}"
49
+ if File.exists?(model_file)
50
+ @model = POSModel.new(FileInputStream.new(model_file))
51
+ @tagger = POSTaggerME.new(@model)
52
+ else
53
+ @model = false
54
+ end
55
+ end
56
+
57
+ # ToDo 2012-11-28: only a workaround upto the opennlp tokenizer is implemented
58
+ def tokenize_it stream
59
+ foo = {tokens: [], tags: []}
60
+ stream.split.each do |token|
61
+ splitter = token.split("/")
62
+ if splitter.length == 2
63
+ foo[:tokens] << splitter.first
64
+ foo[:tags] << splitter.last
65
+ else
66
+ splitter[0..-2].each do |splits|
67
+ foo[:tokens] << splits
68
+ foo[:tags] << splitter.last
69
+ end
70
+ end
71
+ end
72
+ foo
73
+ end
74
+
75
+ end # class PosTags
76
+
77
+ end # module NlpToolz
@@ -0,0 +1,50 @@
1
+ # coding: utf-8
2
+ # @author: LeFnord
3
+ # @email: pscholz.le@gmail.com
4
+ # @date: 2012-10-23
5
+
6
+ # ToDo 2012-10-24: add train capabilities
7
+ module NlpToolz
8
+
9
+ class Sentences
10
+
11
+ include Lang
12
+
13
+ # load java classes
14
+ FileInputStream = Rjb::import('java.io.FileInputStream')
15
+ SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
16
+ SentenceModel = Rjb::import('opennlp.tools.sentdetect.SentenceModel')
17
+
18
+ attr_accessor :input, :lang, :model, :model_name, :sentences
19
+
20
+ def initialize(input,lang = nil)
21
+ @input = input
22
+ @lang = lang || get_language
23
+ @model_name = "#{@lang}-sent.bin"
24
+ get_model
25
+ end
26
+
27
+ def split_into_sentences
28
+ @sentences = @sentence_detector.sentDetect(@input).to_a
29
+ end
30
+
31
+ def has_model?
32
+ @model
33
+ end
34
+
35
+ private
36
+
37
+ def get_model
38
+ model_file = "#{MODELS}/sent/#{@model_name}"
39
+ if File.exists?(model_file)
40
+ @model = SentenceModel.new(FileInputStream.new(model_file))
41
+ @sentence_detector = SentenceDetectorME.new(@model)
42
+ else
43
+ @model = false
44
+ # raise 'file not found'
45
+ end
46
+ end
47
+
48
+ end # class Sentences
49
+
50
+ end # module NlpToolz
@@ -0,0 +1,48 @@
1
+ # coding: utf-8
2
+ # @author: LeFnord
3
+ # @email: pscholz.le@gmail.com
4
+ # @date: 2012-11-30
5
+
6
+ module NlpToolz
7
+
8
+ class Tokens
9
+
10
+ include Lang
11
+
12
+ # load java classes
13
+ FileInputStream = Rjb::import('java.io.FileInputStream')
14
+ TokenizerModel = Rjb::import('opennlp.tools.tokenize.TokenizerModel')
15
+ TokenizerME = Rjb::import('opennlp.tools.tokenize.TokenizerME')
16
+
17
+ attr_accessor :input, :lang, :model, :model_name, :tokens
18
+
19
+ def initialize(input, lang = nil)
20
+ @input = input
21
+ @lang = lang || get_language
22
+ @model_name = "#{@lang}-token.bin"
23
+ get_model
24
+ end
25
+
26
+ def tokenize
27
+ @tokens = @tokenizer.tokenize(@input)
28
+ end
29
+
30
+ def has_model?
31
+ @model
32
+ end
33
+
34
+ private
35
+
36
+ def get_model
37
+ model_file = "#{MODELS}/token/#{@model_name}"
38
+ if File.exists?(model_file)
39
+ @model = TokenizerModel.new(FileInputStream.new(model_file))
40
+ @tokenizer = TokenizerME.new(@model)
41
+ else
42
+ @model = false
43
+ end
44
+ end
45
+
46
+ end # Class Tokens
47
+
48
+ end # module NlpToolz
@@ -0,0 +1,8 @@
1
+ # coding: utf-8
2
+ # @author: LeFnord
3
+ # @email: pscholz.le@gmail.com
4
+ # @date: 2012-10-23
5
+
6
+ module NlpToolz
7
+ VERSION = "1.0.3"
8
+ end