nlp_toolz 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 71916455cffe07c8464fb8cc1543d7b8a2ea7205
4
+ data.tar.gz: bc30072b7d62770c3e202e0545137056fe5a6164
5
+ SHA512:
6
+ metadata.gz: 997d3fc4fb5d9c18546e1ea4c5c8acd19e61ef6979ece0d27cff540cea99c2ecae094fba16a4c3aa25dc05f1fe9282498c228a898b68b4271e493027663e0ba3
7
+ data.tar.gz: 42d5ea917f3febe6484a80ab085f0b41515540f841edc2de4b219d06456d7d331a750fb306095336918b4c82f4cd184d1dc6099cd4ff0fd51e2cb487adab9944
data/.gitignore ADDED
@@ -0,0 +1,28 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+
19
+ .rvmrc
20
+
21
+ ToDo.task
22
+
23
+ teste.rb
24
+
25
+ .DS_Store
26
+ test-data/
27
+ jars/*
28
+ models/*
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in nlp_toolz.gemspec
4
+ gemspec
5
+
6
+ # gem 'birch', git: 'git://github.com/louismullie/birch.git'
data/Guardfile ADDED
@@ -0,0 +1,13 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard :bundler do
5
+ watch('Gemfile')
6
+ watch(/^.+\.gemspec/)
7
+ end
8
+
9
+ guard :rspec do
10
+ watch(%r{^spec/.+_spec\.rb$})
11
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
12
+ watch('spec/spec_helper.rb') { "spec" }
13
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 LeFnord
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # NlpToolz
2
+
3
+ Basic NLP tools, mostly based on [OpenNLP](http://opennlp.apache.org), at this time `sentence finder`, `tokenizer` and `POS tagger` implemented, plus [Berkeley Parser](http://code.google.com/p/berkeleyparser/).
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'nlp_toolz'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install nlp_toolz
18
+
19
+ Download jars and model files from [Dropbox](https://www.dropbox.com/sh/1layyjgf5h0wwi3/s2SHAnfVhs) and unzip it in gem folder.
20
+
21
+ ## Usage
22
+
23
+ see: [nlp_toolz.rb](https://github.com/LeFnord/nlp_toolz/blob/master/lib/nlp_toolz.rb) and specs for usage
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create new Pull Request
32
+
33
+ ## Comments
34
+
35
+ - removed Celluloid, do concurrency in your app, where it be used
36
+ - check `load_jars` for JVM parameters
37
+
data/Rakefile ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require "awesome_print"
4
+ require 'rspec/core'
5
+ require 'rspec/core/rake_task'
6
+ RSpec::Core::RakeTask.new(:spec) do |spec|
7
+ spec.pattern = FileList['spec/**/*_spec.rb']
8
+ end
9
+
10
+ task :default => :spec
11
+
12
+ require 'yard'
13
+ YARD::Rake::YardocTask.new
14
+
15
+ Dir["lib/tasks/**/*.rake"].sort.each { |ext| load ext }
data/bin/nlp_toolz ADDED
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'gli'
4
+ begin # XXX: Remove this begin/rescue before distributing your app
5
+ require 'nlp_toolz'
6
+ rescue LoadError
7
+ STDERR.puts "In development, you need to use `bundle exec bin/nlp_toolz` to run your app"
8
+ STDERR.puts "At install-time, RubyGems will make sure lib, etc. are in the load path"
9
+ STDERR.puts "Feel free to remove this message from bin/NlpToolz now"
10
+ exit 64
11
+ end
12
+
13
+ include GLI::App
14
+
15
+ # helper methods
16
+ def get_out(this)
17
+ ap this if $stdout.tty?
18
+ $stdout.puts this unless $stdout.tty?
19
+ end
20
+
21
+ def get_in(input_arg)
22
+ if File.exists?(input_arg) && !File.directory?(input_arg)
23
+ return get_file(input_arg)
24
+ else
25
+ return input_arg
26
+ end
27
+ end
28
+
29
+ def get_file(name)
30
+ file = File.open(name).gets(nil)
31
+ "" if file.nil?
32
+ file.force_encoding("utf-8") unless file.nil?
33
+ end
34
+
35
+
36
+ program_desc 'running basic NLP tasks'
37
+
38
+ version NlpToolz::VERSION
39
+
40
+ desc 'sentence detection'
41
+ arg_name 'Describe arguments to sent here'
42
+ command :sent do |c|
43
+ c.desc 'file input'
44
+ c.arg_name '<path/to/file>'
45
+ c.flag [:f,:file]
46
+ c.action do |global_options,options,args|
47
+ input = get_in(options[:f] || args.first)
48
+ get_out NlpToolz.get_sentences(input)
49
+ end
50
+ end
51
+
52
+ desc 'parsing text'
53
+ arg_name 'Describe arguments to parse here'
54
+ command :parse do |c|
55
+ c.desc 'file input'
56
+ c.arg_name '<path/to/file>'
57
+ c.flag [:f,:file]
58
+ c.action do |global_options,options,args|
59
+ input = get_in(options[:f] || args.first)
60
+ get_out NlpToolz.parse_text(input)
61
+ end
62
+ end
63
+
64
+ desc 'pos tagging of text'
65
+ arg_name 'Describe arguments to tag here'
66
+ command :tag do |c|
67
+ c.desc 'file input'
68
+ c.arg_name '<path/to/file>'
69
+ c.flag [:f,:file]
70
+ c.action do |global_options,options,args|
71
+ input = get_in(options[:f] || args.first)
72
+ get_out NlpToolz.tag_text(input)
73
+ end
74
+ end
75
+
76
+ desc 'tokenizing text'
77
+ arg_name 'Describe arguments to token here'
78
+ command :token do |c|
79
+ c.desc 'file input'
80
+ c.arg_name '<path/to/file>'
81
+ c.flag [:f,:file]
82
+ c.action do |global_options,options,args|
83
+ input = get_in(options[:f] || args.first)
84
+ get_out NlpToolz.tokenize_text(input)
85
+ end
86
+ end
87
+
88
+ on_error do |exception|
89
+ true
90
+ end
91
+
92
+ exit run(ARGV)
@@ -0,0 +1,36 @@
1
+ module Lang
2
+
3
+ include UrlHandler
4
+ # get language of input
5
+ def get_language(text = nil)
6
+ environment = ENV['ENV_NAME'] || 'development'
7
+ # ToDo 2013-03-14: respect environment
8
+ case environment
9
+ when 'development'
10
+ # development -> local
11
+ # uri = build_url("localhost", 9292, "/langid", nil)
12
+ uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
13
+ when 'production'
14
+ # production
15
+ uri = build_url("arielle.tm.informatik.uni-leipzig.de", 55700, "/langid", nil)
16
+ end
17
+
18
+ if @input
19
+ asv_response = post_data(URI.escape(@input),uri,{'Content-type'=>'text/plain;charset=utf-8'})
20
+ elsif text
21
+ asv_response = post_data(URI.escape(text),uri,{'Content-type'=>'text/plain;charset=utf-8'})
22
+ end
23
+ response = MultiJson.load(asv_response.body)
24
+
25
+ response["lang"]
26
+ end
27
+
28
+ # ToDo 2013-02-26: make different lang identifier available
29
+ def alternative_langs lang
30
+ langs = {
31
+ en: [:eng, :english],
32
+ de: [:ger, :german]
33
+ }.each.collect{|x| x.flatten}
34
+ end
35
+
36
+ end
@@ -0,0 +1,20 @@
1
+ # coding: utf-8
2
+
3
+ class String
4
+ # ToDo: check abbr against list of ..
5
+ def clean_up
6
+ foo = self.encode('UTF-8', :invalid => :replace, :undef => :replace)
7
+ bar = foo.gsub(/[\p{Pi}\p{Pf}"'„“‘’“”«»‹›]/,'') # quotation marks
8
+ .gsub(/\b\/\b/,' ')
9
+ .gsub(/(\p{Ps})(.)/,'\1 \2') # left braces
10
+ .gsub(/(.)(\p{Pe})/,'\1 \2') # right braces
11
+ .gsub(/([\w]{3,})([\.])/,'\1 \2') # abbrevation?
12
+ .gsub(/(.)([,;:!?]+)/,'\1 \2') # punctation
13
+ bar
14
+ end
15
+
16
+ def basename
17
+ self.split("/").last
18
+ end
19
+ end
20
+
@@ -0,0 +1,18 @@
1
+ require 'tempfile'
2
+
3
+ module TmpFile
4
+ module_function
5
+
6
+ def make_tmp_file_from text = nil
7
+ tmp_file = ::Tempfile.new('tmp.txt')
8
+ tmp_file.write text unless text.nil?
9
+ tmp_file.rewind
10
+ tmp_file
11
+ end
12
+
13
+ def delete_and_unlink_tmp_file tmp_file
14
+ tmp_file.close
15
+ tmp_file.unlink
16
+ end
17
+
18
+ end
@@ -0,0 +1,26 @@
1
+ require 'uri'
2
+ require 'net/http'
3
+
4
+ module UrlHandler
5
+ module ClassMethods
6
+ end
7
+
8
+ # instance methods
9
+ def build_url(host, port, path, query)
10
+ return URI::HTTP.build({:host => host, :path => path, :query => query}) if port.nil?
11
+ return URI::HTTP.build({:host => host, :port => port, :path => path, :query => query}) unless port.nil?
12
+ end
13
+
14
+
15
+ def post_data(content,uri,content_type)
16
+ post = Net::HTTP::Post.new(uri.request_uri,content_type)
17
+ post.body = content.force_encoding("utf-8")
18
+ uri_response = Net::HTTP.start(uri.host,uri.port) {|http| http.request(post)}
19
+
20
+ uri_response
21
+ end
22
+
23
+ def self.included(receiver)
24
+ receiver.extend ClassMethods
25
+ end
26
+ end
@@ -0,0 +1,22 @@
1
+ module NlpToolz
2
+ MODELS = File.join(File.dirname(__FILE__), '..', '..', "models")
3
+ JARS = File.join(File.dirname(__FILE__), '..', '..', "jars")
4
+
5
+ CLASS_PATH = [
6
+ File.join(JARS, "jwnl-1.3.3.jar"),
7
+ File.join(JARS, "opennlp-tools-1.5.3.jar"),
8
+ File.join(JARS, "opennlp-maxent-3.0.3.jar")
9
+ ].join(":")
10
+
11
+ Rjb::load(CLASS_PATH,['-Xmx4096m','-Djava.awt.headless=true'])
12
+ # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseParallelGC','-XX:+UseParallelOldGC','-Djava.awt.headless=true'])
13
+ # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseConcMarkSweepGC','-Djava.awt.headless=true'])
14
+ # Rjb::load(CLASS_PATH,['-Xmx4096m','-XX:+UseSerialGC','-Djava.awt.headless=true'])
15
+ end
16
+
17
+ # simple example benchmarks, pos tagging 862 pharses:
18
+ # /wo extra options -> 656s
19
+ # /w ParallelGC -> 657s
20
+ # /w ConcMarkSweepGC -> 659s
21
+ # /w SerialGC -> 668s
22
+ # see: [Java GC tuning](http://www.oracle.com/technetwork/java/javase/gc-tuning-6-140523.html)
@@ -0,0 +1,146 @@
1
+ # coding: utf-8
2
+ # @author: LeFnord
3
+ # @email: pscholz.le@gmail.com
4
+ # @date: 2012-12-10
5
+
6
+ module NlpToolz
7
+
8
+ class Parser
9
+
10
+ include Lang
11
+ include TmpFile
12
+
13
+ # load java classes
14
+ FileInputStream = Rjb::import('java.io.FileInputStream')
15
+
16
+ attr_reader :parsed
17
+ attr_accessor :input, :lang, :model, :model_name, :parse_hash
18
+
19
+ def initialize(input, lang = nil)
20
+ @input = input
21
+ @lang = lang || get_language
22
+ @model_name = "#{@lang}-sm5.gr"
23
+ get_model
24
+ end
25
+
26
+ def parse_text
27
+ parsed = nil
28
+ if self.has_model?
29
+ jar = "#{JARS}/BerkeleyParser-1.7.jar"
30
+ in_file = make_tmp_file_from @input.clean_up
31
+ out_file = make_tmp_file_from
32
+ `java -Xmx4g -jar #{jar} -gr #{@model} -inputFile #{in_file.path} -outputFile #{out_file.path} -tokenize -maxLength 500`.chomp
33
+ @parsed = File.open(out_file).gets(nil).chomp
34
+
35
+ parse_output_to_hash
36
+
37
+ delete_and_unlink_tmp_file in_file
38
+ delete_and_unlink_tmp_file out_file
39
+ end
40
+ end
41
+
42
+ def has_model?
43
+ @model
44
+ end
45
+
46
+ def layer(level = nil)
47
+ @first_layer
48
+ end
49
+
50
+ def hash
51
+ @parse_hash
52
+ end
53
+
54
+ private
55
+
56
+ # helper for ...
57
+ # initialize
58
+ def get_model
59
+ model_file = "#{MODELS}/parser/#{@model_name}"
60
+ if File.exists?(model_file)
61
+ @model = model_file
62
+ else
63
+ @model = false
64
+ end
65
+ end
66
+
67
+ # convert: #tree -> #hash
68
+ def parse_output_to_hash
69
+ parsed = split_parse_tree(self.parsed)
70
+ nodes = create_leafs(parsed)
71
+ @parse_hash = make_hash_hash(nodes)
72
+
73
+ @parse_hash
74
+ end
75
+
76
+ # helper for parsing to hash
77
+ ::Leaf = Struct.new(:tag, :token)
78
+ ::Node = Struct.new(:tag, :parent, :childs)
79
+
80
+ # 1. split
81
+ def split_parse_tree(parsed)
82
+ bar = parsed.gsub("))", ") )").gsub("))", ") )")
83
+ .gsub("(", "{")
84
+ .gsub(")", "}")
85
+
86
+ bar.split
87
+ end
88
+
89
+ # 2. merge tags and tokens, create leafs
90
+ def create_leafs(parsed)
91
+ @first_layer = {tags: [],tokens: []}
92
+ leafs = {}
93
+ foo = []
94
+ parsed.each_with_index do |part,i|
95
+ if part =~ /\{([\w\-]+|\$\p{P}|\p{P})/ && parsed[i+1] =~ /([\p{L}\p{N}\-\.]+|\p{P})\}/
96
+ tag = part.gsub("{","")
97
+ token = parsed[i+1].gsub("}","")
98
+ @first_layer[:tags] << tag
99
+ @first_layer[:tokens] << token
100
+
101
+ leaf = Leaf.new(tag.to_sym,token)
102
+
103
+ if foo[foo.length-1].is_a?(Hash)
104
+ foo[foo.length-1] = [foo[foo.length-1], leaf]
105
+ elsif foo[foo.length-1].is_a?(Array)
106
+ foo[foo.length-1] << leaf
107
+ else
108
+ foo << leaf
109
+ end
110
+ elsif part !~ /([\p{L}\p{N}\-]+|\p{P})\}/
111
+ if part =~ /(\{)(.+)/
112
+ foo << "{#{part.gsub("{","")}"
113
+ else
114
+ foo << "#{part}"
115
+ end
116
+ end
117
+ end
118
+
119
+ foo
120
+ end
121
+
122
+ def make_hash_hash(nodes)
123
+ tmp = catch(:done) {
124
+ nodes.reverse.each_with_index do |node,i|
125
+ if node =~ /\{(\w+)/
126
+ key = node.match(/\{(\w+)/)[1].to_sym
127
+ part = []
128
+ nodes[-i-1..-1].each_with_index do |x,ii|
129
+ if x == "}"
130
+ part = {key => nodes[-i..-i+ii-2]}
131
+ throw :done, [nodes[0..-i-2],part,nodes[-i+ii..-1]].flatten
132
+ end
133
+ end
134
+ end
135
+ end
136
+ }
137
+ if tmp.length > 3
138
+ make_hash_hash(tmp)
139
+ else
140
+ tmp[1]
141
+ end
142
+ end
143
+
144
+ end # class Parser
145
+
146
+ end # module NlpToolz
@@ -0,0 +1,77 @@
1
+ # coding: utf-8
2
+ # @author: LeFnord
3
+ # @email: pscholz.le@gmail.com
4
+ # @date: 2012-10-24
5
+
6
+ # ToDo 2012-10-24: add train capabilities
7
+ module NlpToolz
8
+
9
+ class PosTags
10
+
11
+ include Lang
12
+
13
+ # load java classes
14
+ FileInputStream = Rjb::import('java.io.FileInputStream')
15
+ POSModel = Rjb::import('opennlp.tools.postag.POSModel')
16
+ POSTaggerME = Rjb::import('opennlp.tools.postag.POSTaggerME')
17
+
18
+ attr_accessor :input, :lang, :model, :model_name, :tokenized
19
+
20
+ def initialize(input, lang = nil)
21
+ @input = input
22
+ @lang = lang || get_language
23
+ @model_name = "#{@lang}-pos-maxent.bin"
24
+ get_model
25
+ end
26
+
27
+ def get_pos_tags
28
+ if self.has_model?
29
+ @tokenized = tokenize_it @tagger.tag(@input.clean_up)
30
+ end
31
+ end
32
+
33
+ def tokens
34
+ @tokenized[:tokens]
35
+ end
36
+
37
+ def tags
38
+ @tokenized[:tags]
39
+ end
40
+
41
+ def has_model?
42
+ @model
43
+ end
44
+
45
+ private
46
+
47
+ def get_model
48
+ model_file = "#{MODELS}/pos/#{@model_name}"
49
+ if File.exists?(model_file)
50
+ @model = POSModel.new(FileInputStream.new(model_file))
51
+ @tagger = POSTaggerME.new(@model)
52
+ else
53
+ @model = false
54
+ end
55
+ end
56
+
57
+ # ToDo 2012-11-28: only a workaround upto the opennlp tokenizer is implemented
58
+ def tokenize_it stream
59
+ foo = {tokens: [], tags: []}
60
+ stream.split.each do |token|
61
+ splitter = token.split("/")
62
+ if splitter.length == 2
63
+ foo[:tokens] << splitter.first
64
+ foo[:tags] << splitter.last
65
+ else
66
+ splitter[0..-2].each do |splits|
67
+ foo[:tokens] << splits
68
+ foo[:tags] << splitter.last
69
+ end
70
+ end
71
+ end
72
+ foo
73
+ end
74
+
75
+ end # class PosTags
76
+
77
+ end # module NlpToolz
@@ -0,0 +1,50 @@
1
+ # coding: utf-8
2
+ # @author: LeFnord
3
+ # @email: pscholz.le@gmail.com
4
+ # @date: 2012-10-23
5
+
6
+ # ToDo 2012-10-24: add train capabilities
7
+ module NlpToolz
8
+
9
+ class Sentences
10
+
11
+ include Lang
12
+
13
+ # load java classes
14
+ FileInputStream = Rjb::import('java.io.FileInputStream')
15
+ SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
16
+ SentenceModel = Rjb::import('opennlp.tools.sentdetect.SentenceModel')
17
+
18
+ attr_accessor :input, :lang, :model, :model_name, :sentences
19
+
20
+ def initialize(input,lang = nil)
21
+ @input = input
22
+ @lang = lang || get_language
23
+ @model_name = "#{@lang}-sent.bin"
24
+ get_model
25
+ end
26
+
27
+ def split_into_sentences
28
+ @sentences = @sentence_detector.sentDetect(@input).to_a
29
+ end
30
+
31
+ def has_model?
32
+ @model
33
+ end
34
+
35
+ private
36
+
37
+ def get_model
38
+ model_file = "#{MODELS}/sent/#{@model_name}"
39
+ if File.exists?(model_file)
40
+ @model = SentenceModel.new(FileInputStream.new(model_file))
41
+ @sentence_detector = SentenceDetectorME.new(@model)
42
+ else
43
+ @model = false
44
+ # raise 'file not found'
45
+ end
46
+ end
47
+
48
+ end # class Sentences
49
+
50
+ end # module NlpToolz
@@ -0,0 +1,48 @@
1
+ # coding: utf-8
2
+ # @author: LeFnord
3
+ # @email: pscholz.le@gmail.com
4
+ # @date: 2012-11-30
5
+
6
+ module NlpToolz
7
+
8
+ class Tokens
9
+
10
+ include Lang
11
+
12
+ # load java classes
13
+ FileInputStream = Rjb::import('java.io.FileInputStream')
14
+ TokenizerModel = Rjb::import('opennlp.tools.tokenize.TokenizerModel')
15
+ TokenizerME = Rjb::import('opennlp.tools.tokenize.TokenizerME')
16
+
17
+ attr_accessor :input, :lang, :model, :model_name, :tokens
18
+
19
+ def initialize(input, lang = nil)
20
+ @input = input
21
+ @lang = lang || get_language
22
+ @model_name = "#{@lang}-token.bin"
23
+ get_model
24
+ end
25
+
26
+ def tokenize
27
+ @tokens = @tokenizer.tokenize(@input)
28
+ end
29
+
30
+ def has_model?
31
+ @model
32
+ end
33
+
34
+ private
35
+
36
+ def get_model
37
+ model_file = "#{MODELS}/token/#{@model_name}"
38
+ if File.exists?(model_file)
39
+ @model = TokenizerModel.new(FileInputStream.new(model_file))
40
+ @tokenizer = TokenizerME.new(@model)
41
+ else
42
+ @model = false
43
+ end
44
+ end
45
+
46
+ end # Class Tokens
47
+
48
+ end # module NlpToolz
@@ -0,0 +1,8 @@
1
+ # coding: utf-8
2
+ # @author: LeFnord
3
+ # @email: pscholz.le@gmail.com
4
+ # @date: 2012-10-23
5
+
6
+ module NlpToolz
7
+ VERSION = "1.0.3"
8
+ end