nlp-pure 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c5bbc92e65c96837a6e53f28248e15d48a35abe1
4
- data.tar.gz: 79f767942ba8723a3f5f6eb04ea0ec4498e02591
3
+ metadata.gz: 040f2a0f166664553334b5b5c4bd8119b1033fd0
4
+ data.tar.gz: a414246804bbba1dd57a4ecd70e82d57eb92bbc5
5
5
  SHA512:
6
- metadata.gz: 9e00458afc1dadd851ea8ccd4e312ec19c6b775b455ebf6d5e599480dde8f333704a8c9601f62970bdefe26ffea7bd509bb3cd52314775d642865587d94e7214
7
- data.tar.gz: 72abbd773eb915a11f76526b9bdfb37cbcd05c258aab45fd3c7e18c9fc1591c84c97cc3f99641ecee20ad27ea47d10daf9e35128d572d95aeb17aeda809e8a93
6
+ metadata.gz: ffe7ee7f37f3b724f74ce7ddaf318fd38d5addcdcaa844f1abafe81bbdd7d5c20dfe53f9122a65155173491775e17819940ee2e7faac523403d184e508a60827
7
+ data.tar.gz: 7d69a2dfc54b6cf1e008b35d7fc040697f3e7f9decfab4613564814a4cc74e3affb7ba52eae4efddb751b2f6df6be2cfef617929d3cbbbe8691d62307cdd19b3
data/.gitignore CHANGED
@@ -29,3 +29,5 @@ Gemfile.lock
29
29
 
30
30
  *.swp
31
31
  .DS_Store
32
+ TODO.md
33
+ ideas
@@ -2,25 +2,41 @@ AllCops:
2
2
  Exclude:
3
3
  - Guardfile
4
4
  - 'vendor/**/*'
5
+ - 'test/**/*'
5
6
 
6
- RunRailsCops: false
7
+ Rails:
8
+ Enabled: false
7
9
 
10
+ # NLP is hard
11
+ AbcSize:
12
+ Max: 22.5
8
13
  AlignParameters:
9
14
  Enabled: false
10
15
  ClassAndModuleChildren:
11
16
  Enabled: false
17
+ # NLP is hard
18
+ CyclomaticComplexity:
19
+ Max: 12
12
20
  Encoding:
13
21
  Enabled: false
14
- LineLength:
15
- Max: 200
16
22
  HashSyntax:
17
23
  Exclude:
18
24
  - Rakefile
19
25
  - 'spec/**/*'
20
26
  - 'test/**/*'
27
+ LineLength:
28
+ Max: 200
29
+ # NLP is hard
30
+ MethodLength:
31
+ Max: 15
32
+ PerceivedComplexity:
33
+ Max: 12
21
34
 
22
35
  # Don't fail on whitespace between method names and arguments
23
- Style/SingleSpaceBeforeFirstArg:
36
+ Style/SpaceBeforeFirstArg:
37
+ Enabled: false
38
+
39
+ Style/SymbolArray:
24
40
  Enabled: false
25
41
 
26
42
  # Indent private/protected/public as deep as method definitions
@@ -1,14 +1,19 @@
1
1
  language: ruby
2
2
  sudo: false
3
3
  cache: bundler
4
+ dist: trusty
5
+ addons:
6
+ apt:
7
+ packages:
8
+ - haveged
4
9
  # NOTE: these run in order
5
10
  rvm:
6
11
  - jruby
7
- - rbx-2
8
- - 2.0.0
9
- - 2.1
10
- - 2.2
12
+ - rbx-3.73
13
+ - 2.2.7
14
+ - 2.3.4
15
+ - 2.4.1
11
16
  matrix:
12
17
  allow_failures:
13
- - rvm: rbx-2
18
+ - rvm: rbx-3.73
14
19
  - rvm: jruby
@@ -4,6 +4,8 @@ Officially leaving a non-semantic versioning scheme.
4
4
 
5
5
  Added benchmarking test.
6
6
 
7
+ Replaced RSpec with Minitest
8
+
7
9
  # 0.0.5
8
10
 
9
11
  Fixed bug in `NlpPure::Segmenting::DefaultWord` where leading ellipses could produce extra segmented words.
data/Gemfile CHANGED
@@ -1,22 +1,20 @@
1
1
  source 'https://rubygems.org'
2
2
  gemspec
3
3
 
4
+ gem 'rubocop'
5
+
4
6
  platforms :rbx do
5
- gem 'rubysl', '~> 2.0' # if using anything in the ruby standard library
6
7
  gem 'psych' # if using yaml
7
- gem 'minitest' # if using minitest
8
8
  gem 'rubinius-developer_tools' # if using any of coverage, debugger, profiler
9
+ gem 'rubysl', '~> 2.0' # if using anything in the ruby standard library
9
10
  end
10
11
 
11
12
  platforms :jruby do
12
- gem 'jruby-openssl'
13
13
  gem 'activerecord-jdbcsqlite3-adapter'
14
+ gem 'jruby-openssl'
14
15
  end
15
16
 
16
17
  group :test do
17
- gem 'rake'
18
- gem 'rspec', '~> 3.0.0'
19
- gem 'guard-rspec'
20
- gem 'guard-rubocop'
21
18
  gem 'coveralls', require: false
19
+ gem 'rake'
22
20
  end
data/README.md CHANGED
@@ -1,8 +1,8 @@
1
1
  # NLP Pure
2
2
 
3
- [![Code Climate](https://codeclimate.com/github/parhamr/nlp-pure/badges/gpa.svg)](https://codeclimate.com/github/parhamr/nlp-pure)
3
+ [![Gem Version](https://badge.fury.io/rb/nlp-pure.svg)](https://badge.fury.io/rb/nlp-pure) [![Code Climate](https://codeclimate.com/github/parhamr/nlp-pure/badges/gpa.svg)](https://codeclimate.com/github/parhamr/nlp-pure)
4
4
  [![Build Status](https://travis-ci.org/parhamr/nlp-pure.svg?branch=master)](https://travis-ci.org/parhamr/nlp-pure)
5
- [![Coverage Status](https://coveralls.io/repos/parhamr/nlp-pure/badge.png?branch=master)](https://coveralls.io/r/parhamr/nlp-pure?branch=master)
5
+ [![Coverage Status](https://coveralls.io/repos/github/parhamr/nlp-pure/badge.svg?branch=master)](https://coveralls.io/github/parhamr/nlp-pure?branch=master)
6
6
 
7
7
  Natural language processing algorithms implemented in pure Ruby with minimal dependencies.
8
8
 
@@ -14,7 +14,8 @@ This project aims to provide functionality similar to [Treat](https://github.com
14
14
 
15
15
  * [Installation](#installation)
16
16
  * [Usage](#usage)
17
- ** [Word Segmentation](#word-segmentation)
17
+ * [Word Segmentation](#word-segmentation)
18
+ * [Sentence Segmentation](#sentence-segmentation)
18
19
  * [Supported Ruby Versions](#supported-ruby-versions)
19
20
  * [Versioning](#versioning)
20
21
  * [Contributing](CONTRIBUTING.md)
@@ -61,6 +62,20 @@ irb(main):005:0> NlpPure::Segmenting::DefaultWord.parse "Mary had a little lamb,
61
62
  => ["Mary", "had", "a", "little", "lamb,", "His", "fleece", "was", "white", "as", "snow,", "And", "everywhere", "that", "Mary", "went,", "The", "lamb", "was", "sure", "to", "go."]
62
63
  ```
63
64
 
65
+ ### Sentence Segmentation
66
+
67
+ ```
68
+ M017-PDX:nlp-pure rp0616$ bundle exec irb
69
+ irb(main):001:0> require 'nlp_pure/segmenting/default_sentence'
70
+ => true
71
+ irb(main):002:0> NlpPure::Segmenting::DefaultSentence.parse 'The U.S.A. is a member of NATO.'
72
+ => ["The U.S.A. is a member of NATO."]
73
+ irb(main):003:0> NlpPure::Segmenting::DefaultSentence.parse 'Mary had a little lamb. The lamb\U+FFE2s fleece was white as snow. Everywhere that Mary went, the lamb was sure to go.'
74
+ => ["Mary had a little lamb.", "The lambs fleece was white as snow.", "Everywhere that Mary went, the lamb was sure to go."]
75
+ irb(main):004:0> NlpPure::Segmenting::DefaultSentence.parse 'I am excited! Today is Friday.'
76
+ => ["I am excited!", "Today is Friday."]
77
+ ```
78
+
64
79
 
65
80
  ## Supported Ruby Versions
66
81
 
@@ -101,29 +116,29 @@ spec.add_dependency 'nlp-pure', '~> 0.1'
101
116
  [Search “nlp” at ruby-toolbox.com](https://www.ruby-toolbox.com/search?q=nlp)
102
117
 
103
118
  * APIs
104
- ** [alchemy_api](https://github.com/dbalatero/alchemy_api)
105
- ** [napi-ruby](https://github.com/Maluuba/napi-ruby)
106
- ** [poliqarpr](https://github.com/apohllo/poliqarpr)
107
- ** [wlapi](https://github.com/arbox/wlapi)
119
+ * [alchemy_api](https://github.com/dbalatero/alchemy_api)
120
+ * [napi-ruby](https://github.com/Maluuba/napi-ruby)
121
+ * [poliqarpr](https://github.com/apohllo/poliqarpr)
122
+ * [wlapi](https://github.com/arbox/wlapi)
108
123
  * Bindings and Toolkits
109
- ** [open-nlp](https://github.com/louismullie/open-nlp)
110
- ** [stanford-core-nlp](https://github.com/louismullie/stanford-core-nlp)
111
- ** [treat](https://github.com/louismullie/treat)
124
+ * [open-nlp](https://github.com/louismullie/open-nlp)
125
+ * [stanford-core-nlp](https://github.com/louismullie/stanford-core-nlp)
126
+ * [treat](https://github.com/louismullie/treat)
112
127
  * Classification
113
- ** [linnaeus](https://github.com/djcp/linnaeus)
114
- ** [maxent_string_classifier](https://github.com/mccraigmccraig/maxent_string_classifier)
128
+ * [linnaeus](https://github.com/djcp/linnaeus)
129
+ * [maxent_string_classifier](https://github.com/mccraigmccraig/maxent_string_classifier)
115
130
  * N-Grams
116
- ** [ruby-ngram](https://github.com/tkellen/ruby-ngram)
131
+ * [ruby-ngram](https://github.com/tkellen/ruby-ngram)
117
132
  * Specific Languages
118
- ** Polish
119
- *** [nlp](https://github.com/knife/nlp)
133
+ * Polish
134
+ * [nlp](https://github.com/knife/nlp)
120
135
  * Stopwords
121
- ** [clarifier](https://github.com/meducation/clarifier)
122
- ** [stopwords](https://github.com/brez/stopwords)
123
- ** [stopwords-filter](https://github.com/brenes/stopwords-filter)
136
+ * [clarifier](https://github.com/meducation/clarifier)
137
+ * [stopwords](https://github.com/brez/stopwords)
138
+ * [stopwords-filter](https://github.com/brenes/stopwords-filter)
124
139
  * Tokenization
125
- ** [rseg](https://rubygems.org/gems/rseg)
126
- ** [Tokenizer](https://github.com/arbox/tokenizer)
140
+ * [rseg](https://rubygems.org/gems/rseg)
141
+ * [Tokenizer](https://github.com/arbox/tokenizer)
127
142
  * Word Counters
128
- ** [words_counted](https://github.com/abitdodgy/words_counted)
143
+ * [words_counted](https://github.com/abitdodgy/words_counted)
129
144
 
data/Rakefile CHANGED
@@ -2,17 +2,26 @@ require 'bundler'
2
2
  Bundler::GemHelper.install_tasks
3
3
 
4
4
  begin
5
- require 'rspec/core/rake_task'
6
- require 'rubocop/rake_task'
7
- RSpec::Core::RakeTask.new(:spec)
5
+ task :coverage do
6
+ require 'coveralls'
7
+ Coveralls.wear!
8
+ require 'minitest'
9
+ end
8
10
 
9
- task :rubocop do
10
- require 'rubocop'
11
- cli = RuboCop::CLI.new
12
- cli.run
11
+ require 'rake/testtask'
12
+ Rake::TestTask.new(:test) do |t|
13
+ require_relative 'test/test_helper'
14
+ t.verbose = true
15
+ t.pattern = 'test/**/*_test.rb'
16
+ end
17
+
18
+ require 'rubocop/rake_task'
19
+ RuboCop::RakeTask.new(:rubocop) do |task|
20
+ # don't abort rake on failure
21
+ task.fail_on_error = false
13
22
  end
14
23
 
15
- task :default => [:spec, :rubocop]
24
+ task default: [:coverage, :test, :rubocop]
16
25
  rescue LoadError => e
17
26
  STDERR << "#{e.class}: #{e.message} (#{e.backtrace[0]})"
18
27
  end
@@ -1,13 +1,14 @@
1
1
  # encoding: utf-8
2
+
2
3
  require 'nlp_pure/version'
3
- fail "NLP Pure #{NlpPure::VERSION} does not support Ruby 1.9." if RUBY_PLATFORM != 'java' && RUBY_VERSION < '2.0.0'
4
+ raise "NLP Pure #{NlpPure::VERSION} does not support Ruby 1.9." if RUBY_PLATFORM != 'java' && RUBY_VERSION < '2.0.0'
4
5
 
5
6
  #
6
7
  module NlpPure
7
- NAME = 'NlpPure'
8
- LICENSE = 'See LICENSE for details.'
8
+ NAME = 'NlpPure'.freeze
9
+ LICENSE = 'See LICENSE for details.'.freeze
9
10
 
10
- DEFAULTS = {}
11
+ DEFAULTS = {}.freeze
11
12
 
12
13
  def self.logger
13
14
  NlpPure::Logging.logger
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+
2
3
  require 'time'
3
4
  require 'logger'
4
5
 
@@ -8,7 +9,7 @@ module NlpPure
8
9
  module Logging
9
10
  #
10
11
  class Pretty < Logger::Formatter
11
- def call(severity, time, program_name, message)
12
+ def call(severity, time, _program_name, message)
12
13
  "#{time.utc.iso8601(2)} #{::Process.pid} #{severity}: #{message}\n"
13
14
  end
14
15
  end
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+
2
3
  #
3
4
  module NlpPure
4
5
  # Namespace for segmenting implementations
@@ -0,0 +1,94 @@
1
+ # encoding: utf-8
2
+
3
+ module NlpPure
4
+ module Segmenting
5
+ # SEE ALSO: Unsupervised Multilingual Sentence Boundary Detection. Kiss, Strunk; 2006.
6
+ # NOTE: this fails on some proper nouns with abbreviations (e.g. business names)
7
+ # and fails on single-linebreak headings
8
+ module DefaultSentence
9
+ DEFAULT_OPTIONS = {
10
+ # punctuation or linebreaks
11
+ split: /([.?!]|\n{2,}|\r\n)+/,
12
+ # array of arrays; [0] should be regexp, [1] should be replacement
13
+ # NOTE: minor performance risk in letting this array grow long
14
+ gsub: [
15
+ # period ellipses need reconstruction
16
+ [/\.{3,}/, '…']
17
+ ],
18
+ naive_sentence_word_count: 3,
19
+ segment_boundary: '. '
20
+ }.freeze
21
+
22
+ module_function
23
+
24
+ # NOTE: exposed as a method for easy mock/stub
25
+ def options
26
+ DEFAULT_OPTIONS
27
+ end
28
+
29
+ def parse(*args)
30
+ return nil if args.nil? || args.empty?
31
+ # naive split
32
+ segments = clean_input(args[0]).split(options.fetch(:split, nil))
33
+ # skip rejoin if one segment
34
+ return segments if segments.length == 1
35
+ returning = rejoin_segment_fragments(segments).compact
36
+ STDERR << "#{returning.inspect}\n" if ENV['DEBUG']
37
+ returning
38
+ end
39
+
40
+ def clean_input(text = nil)
41
+ input = text.to_s
42
+ # perform replacements to work around the limitations of the splitting regexp
43
+ options.fetch(:gsub, []).each do |gsub_pair|
44
+ input.gsub!(gsub_pair[0], gsub_pair[1])
45
+ end
46
+ # NOTE: leading whitespace is problematic; ref #12
47
+ input.strip
48
+ end
49
+
50
+ def rejoin_segment_fragments(segments)
51
+ reassociated_segments = []
52
+ # take all segments
53
+ while (segment = segments.shift)
54
+ STDERR << "#{segment.inspect}\n" if ENV['DEBUG']
55
+ # join segments if needed
56
+ reassociated_segments << handle_special_fragments(segments, segment)
57
+ end
58
+ reassociated_segments
59
+ end
60
+
61
+ # rejoin leading punctuation, abbreviation, and numbers
62
+ def handle_special_fragments(segments, segment)
63
+ # NOTE: always index zero because we're shifting
64
+ while next_segment_appears_included?(segments[0])
65
+ STDERR << "\t\t<< #{segments[0].inspect}\n" if ENV['DEBUG']
66
+ segment = "#{segment}#{segments.shift}"
67
+ end
68
+ segment.strip
69
+ end
70
+
71
+ def next_segment_appears_included?(segment)
72
+ return false unless segment
73
+ # NOTE: the logic is expanded for logging reasons (despite style violation)
74
+ if segment[0] =~ options.fetch(:split, nil)
75
+ STDERR << "\t! leading punctuation detected\n" if ENV['DEBUG']
76
+ elsif segment[0] =~ /^\w/
77
+ STDERR << "\t! assuming abbreviation\n" if ENV['DEBUG']
78
+ elsif segment =~ /^\s[a-z0-9]/
79
+ STDERR << "\t! greedily grabbing lowercase\n" if ENV['DEBUG']
80
+ elsif segment =~ /^\d/
81
+ STDERR << "\t! leading numeral detected\n" if ENV['DEBUG']
82
+ else
83
+ STDERR << "\t\tx\n" if ENV['DEBUG']
84
+ return false
85
+ end
86
+ true
87
+ end
88
+
89
+ def cleanup_segmenting(segments)
90
+ segments.compact
91
+ end
92
+ end
93
+ end
94
+ end
@@ -13,15 +13,15 @@ module NlpPure
13
13
  gsub: [
14
14
  # ellipses at the start of a string are problematic; ref #12
15
15
  [/^\s?(…|\.{3,})/, ' ']
16
- ]
16
+ ],
17
+ segment_boundary: ' '
17
18
  }.freeze
18
19
 
19
20
  module_function
20
21
 
21
22
  def parse(*args)
22
- unless args.nil? || args.empty?
23
- clean_input(args[0]).split(options.fetch(:split, nil))
24
- end
23
+ return nil if args.nil? || args.empty?
24
+ clean_input(args[0]).split(options.fetch(:split, nil))
25
25
  end
26
26
 
27
27
  def clean_input(text = nil)
@@ -1,5 +1,6 @@
1
1
  # encoding: utf-8
2
+
2
3
  #
3
4
  module NlpPure
4
- VERSION = '0.1.0'
5
+ VERSION = '0.2.0'.freeze
5
6
  end
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+
2
3
  require File.expand_path('../lib/nlp_pure/version', __FILE__)
3
4
 
4
5
  Gem::Specification.new do |gem|
@@ -15,6 +16,6 @@ Gem::Specification.new do |gem|
15
16
  gem.require_paths = ['lib']
16
17
  gem.version = NlpPure::VERSION
17
18
  gem.add_development_dependency 'rake', '~> 10.4'
18
- gem.add_development_dependency 'rspec', '~> 3.0'
19
+ gem.add_development_dependency 'minitest', '~> 5.5'
19
20
  gem.add_development_dependency 'coveralls', '~> 0.7'
20
21
  end
@@ -0,0 +1,85 @@
1
+ module CorpusEnglishSimple
2
+ def english_simple_sentence
3
+ 'The quick brown fox jumps over the lazy dog.'
4
+ end
5
+
6
+ def english_hyphen_sentence
7
+ 'The New York-based company hired new staff.'
8
+ end
9
+
10
+ def english_dash_sentence
11
+ 'The quick brown fox—full of energy—jumps over the lazy dog.'
12
+ end
13
+
14
+ def english_spaced_dash_sentence
15
+ 'The quick brown fox — full of energy — jumps over the lazy dog.'
16
+ end
17
+
18
+ def english_twohyphen_sentence
19
+ 'The quick brown fox--full of energy--jumps over the lazy dog.'
20
+ end
21
+
22
+ def english_ellipsis_sentence
23
+ 'The quick brown fox…jumps over the lazy dog.'
24
+ end
25
+
26
+ def english_spaced_ellipsis_sentence
27
+ 'The quick brown fox … jumps over the lazy dog.'
28
+ end
29
+
30
+ def english_period_ellipsis_sentence
31
+ 'The quick brown fox...jumps over the lazy dog.'
32
+ end
33
+
34
+ def english_leading_ellipsis_sentence
35
+ ' … the quick brown fox jumps over the lazy dog.'
36
+ end
37
+
38
+ def english_leading_period_ellipsis_sentence
39
+ ' ... the quick brown fox jumps over the lazy dog.'
40
+ end
41
+
42
+ def english_trailing_ellipsis_sentence
43
+ 'The quick brown fox jumps over the lazy dog … '
44
+ end
45
+
46
+ def english_spaced_period_ellipsis_sentence
47
+ 'The quick brown fox ... jumps over the lazy dog.'
48
+ end
49
+
50
+ def english_abbreviation_sentence
51
+ 'The U.S.A. is a member of NATO.'
52
+ end
53
+
54
+ def english_simple_paragraph
55
+ 'Mary had a little lamb. The lamb’s fleece was white as snow. Everywhere that Mary went, the lamb was sure to go.'
56
+ end
57
+
58
+ def english_simple_line_breaks
59
+ "Mary had a little lamb,\nHis fleece was white as snow,\nAnd everywhere that Mary went,\nThe lamb was sure to go."
60
+ end
61
+
62
+ def english_financial_sentence
63
+ "AMERICAN INDUSTRY INC. sold 1,550,000 common shares at $21.75 each yesterday, according to lead underwriter A.B. Hammersmith & Co."
64
+ end
65
+
66
+ def english_short_sentence
67
+ "Go!"
68
+ end
69
+
70
+ def english_excalamations
71
+ "I am excited! Today is Friday."
72
+ end
73
+
74
+ def english_short_question
75
+ "You?"
76
+ end
77
+
78
+ def english_leading_question
79
+ "On which side of the road do you drive? In North America we drive on the right side."
80
+ end
81
+
82
+ def english_usa_constitution_preamble
83
+ "United States of America 1789 (rev. 1992)\r\nPREAMBLE\r\nWe the People of the United States, in Order to form a more perfect Union, establish Justice, insure domestic Tranquility, provide for the common defense, promote the general Welfare, and secure the Blessings of Liberty to ourselves and our Posterity, do ordain and establish this Constitution for the United States of America."
84
+ end
85
+ end
@@ -0,0 +1,123 @@
1
+ # encoding: utf-8
2
+
3
+ require 'minitest/autorun'
4
+ require 'nlp_pure/segmenting/default_sentence'
5
+ require_relative '../../../fixtures/corpus_english_simple'
6
+
7
+ #
8
+ class TestNlpPureSegmentingDefaultSentence < Minitest::Test
9
+ describe '[module]' do
10
+ def test_module_is_defined
11
+ assert_equal defined?(NlpPure::Segmenting::DefaultSentence), 'constant'
12
+ end
13
+ end
14
+
15
+ describe '(English language)' do
16
+ include ::CorpusEnglishSimple
17
+
18
+ describe '.parse' do
19
+ describe 'with `nil` argument' do
20
+ def test_parse_returns_array
21
+ assert_equal [], NlpPure::Segmenting::DefaultSentence.parse(nil)
22
+ end
23
+ end
24
+
25
+ describe 'without arguments' do
26
+ def test_parse_returns_nil
27
+ assert_nil NlpPure::Segmenting::DefaultSentence.parse
28
+ end
29
+ end
30
+
31
+ describe 'with strings' do
32
+ def test_parse_returns_sentence_array
33
+ assert_instance_of Array, NlpPure::Segmenting::DefaultSentence.parse(english_simple_sentence)
34
+ end
35
+
36
+ def test_parse_correctly_counts_sentences
37
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_simple_sentence).length
38
+ end
39
+
40
+ def test_parse_correctly_sentence_segments_hyphens
41
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_hyphen_sentence).length
42
+ end
43
+
44
+ def test_parse_correctly_sentence_segments_doublehyphen_dashes
45
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_twohyphen_sentence).length
46
+ end
47
+
48
+ def test_parse_correctly_sentence_segments_dashes
49
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_dash_sentence).length
50
+ end
51
+
52
+ def test_parse_correctly_sentence_segments_spaced_dashes
53
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_spaced_dash_sentence).length
54
+ end
55
+
56
+ def test_parse_correctly_sentence_segments_ellipses
57
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_ellipsis_sentence).length
58
+ end
59
+
60
+ def test_parse_correctly_sentence_segments_spaced_ellipses
61
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_spaced_ellipsis_sentence).length
62
+ end
63
+
64
+ def test_parse_correctly_sentence_segments_periodellipses
65
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_period_ellipsis_sentence).length
66
+ end
67
+
68
+ def test_parse_correctly_sentence_segments_spaced_periodellipses
69
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_spaced_period_ellipsis_sentence).length
70
+ end
71
+
72
+ def test_parse_correctly_sentence_segments_leading_spaced_periodellipses
73
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_leading_ellipsis_sentence).length
74
+ end
75
+
76
+ def test_parse_correctly_sentence_segments_trailing_spaced_periodellipses
77
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_trailing_ellipsis_sentence).length
78
+ end
79
+
80
+ def test_parse_does_not_sentence_segment_abbreviations
81
+ assert_equal NlpPure::Segmenting::DefaultSentence.parse(english_abbreviation_sentence),
82
+ ["The U.S.A. is a member of NATO."]
83
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_abbreviation_sentence).length
84
+ end
85
+
86
+ def test_parse_does_not_sentence_segment_financial_jargon
87
+ skip("FIXME: financial jargon is hard")
88
+ assert_equal NlpPure::Segmenting::DefaultSentence.parse(english_financial_sentence),
89
+ [english_financial_sentence]
90
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_financial_sentence).length
91
+ end
92
+
93
+ def test_parse_correctly_sentence_segments_longer_texts
94
+ assert_equal NlpPure::Segmenting::DefaultSentence.parse(english_simple_paragraph),
95
+ ["Mary had a little lamb.", "The lamb’s fleece was white as snow.", "Everywhere that Mary went, the lamb was sure to go."]
96
+ assert_equal 3, NlpPure::Segmenting::DefaultSentence.parse(english_simple_paragraph).length
97
+ end
98
+
99
+ def test_parse_correctly_sentence_segments_line_breaks
100
+ assert_equal 1, NlpPure::Segmenting::DefaultSentence.parse(english_simple_line_breaks).length
101
+ end
102
+
103
+ def test_parse_correctly_sentence_segments_exclamations
104
+ assert_equal NlpPure::Segmenting::DefaultSentence.parse(english_excalamations),
105
+ ["I am excited!", "Today is Friday."]
106
+ assert_equal 2, NlpPure::Segmenting::DefaultSentence.parse(english_excalamations).length
107
+ end
108
+
109
+ def test_parse_correctly_sentence_segments_questions
110
+ assert_equal NlpPure::Segmenting::DefaultSentence.parse(english_leading_question),
111
+ ["On which side of the road do you drive?", "In North America we drive on the right side."]
112
+ assert_equal 2, NlpPure::Segmenting::DefaultSentence.parse(english_leading_question).length
113
+ end
114
+
115
+ def test_parse_correctly_sentence_usa_constitution_preamble
116
+ assert_equal NlpPure::Segmenting::DefaultSentence.parse(english_usa_constitution_preamble)[0],
117
+ "United States of America 1789 (rev. 1992)"
118
+ assert_equal 3, NlpPure::Segmenting::DefaultSentence.parse(english_usa_constitution_preamble).length
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,106 @@
1
+ # encoding: utf-8
2
+
3
+ require 'minitest/autorun'
4
+ require 'nlp_pure/segmenting/default_word'
5
+ require_relative '../../../fixtures/corpus_english_simple'
6
+
7
+ #
8
+ class TestNlpPureSegmentingDefaultWord < Minitest::Test
9
+ describe '[module]' do
10
+ def test_module_is_defined
11
+ assert_equal defined?(NlpPure::Segmenting::DefaultWord), 'constant'
12
+ end
13
+ end
14
+
15
+ describe '(English language)' do
16
+ include ::CorpusEnglishSimple
17
+
18
+ describe '.parse' do
19
+ describe 'with `nil` argument' do
20
+ def test_parse_returns_array
21
+ assert_equal [], NlpPure::Segmenting::DefaultWord.parse(nil)
22
+ end
23
+ end
24
+
25
+ describe 'without arguments' do
26
+ def test_parse_returns_nil
27
+ assert_nil NlpPure::Segmenting::DefaultWord.parse
28
+ end
29
+ end
30
+
31
+ def test_parse_returns_word_array
32
+ assert_instance_of Array, NlpPure::Segmenting::DefaultWord.parse(english_simple_sentence)
33
+ end
34
+
35
+ def test_parse_correctly_counts_words
36
+ assert_equal 9, NlpPure::Segmenting::DefaultWord.parse(english_simple_sentence).length
37
+ end
38
+
39
+ def test_parse_does_not_mangle_english_simple_sentence
40
+ assert_equal english_simple_sentence, NlpPure::Segmenting::DefaultWord.parse(english_simple_sentence).join(NlpPure::Segmenting::DefaultWord.options[:segment_boundary])
41
+ end
42
+
43
+ def test_parse_correctly_word_segments_hyphens
44
+ assert_equal 8, NlpPure::Segmenting::DefaultWord.parse(english_hyphen_sentence).length
45
+ end
46
+
47
+ def test_parse_does_not_mangle_english_hyphen_sentence
48
+ skip("FIXME")
49
+ assert_equal english_simple_sentence, NlpPure::Segmenting::DefaultWord.parse(english_simple_sentence).join(NlpPure::Segmenting::DefaultWord.options[:segment_boundary])
50
+ end
51
+
52
+ def test_parse_correctly_word_segments_doublehyphen_dashes
53
+ assert_equal 12, NlpPure::Segmenting::DefaultWord.parse(english_twohyphen_sentence).length
54
+ end
55
+
56
+ def test_parse_does_not_mangle_english_twohyphen_sentence
57
+ skip("FIXME")
58
+ assert_equal english_twohyphen_sentence, NlpPure::Segmenting::DefaultWord.parse(english_twohyphen_sentence).join(NlpPure::Segmenting::DefaultWord.options[:segment_boundary])
59
+ end
60
+
61
+ def test_parse_correctly_word_segments_dashes
62
+ assert_equal 12, NlpPure::Segmenting::DefaultWord.parse(english_dash_sentence).length
63
+ end
64
+
65
+ def test_parse_correctly_word_segments_spaced_dashes
66
+ assert_equal 12, NlpPure::Segmenting::DefaultWord.parse(english_spaced_dash_sentence).length
67
+ end
68
+
69
+ def test_parse_correctly_word_segments_ellipses
70
+ assert_equal 9, NlpPure::Segmenting::DefaultWord.parse(english_ellipsis_sentence).length
71
+ end
72
+
73
+ def test_parse_correctly_word_segments_spaced_ellipses
74
+ assert_equal 9, NlpPure::Segmenting::DefaultWord.parse(english_spaced_ellipsis_sentence).length
75
+ end
76
+
77
+ def test_parse_correctly_word_segments_periodellipses
78
+ assert_equal 9, NlpPure::Segmenting::DefaultWord.parse(english_period_ellipsis_sentence).length
79
+ end
80
+
81
+ def test_parse_correctly_word_segments_spaced_periodellipses
82
+ assert_equal 9, NlpPure::Segmenting::DefaultWord.parse(english_spaced_period_ellipsis_sentence).length
83
+ end
84
+
85
+ def test_parse_correctly_word_segments_leading_spaced_periodellipses
86
+ assert_equal 9, NlpPure::Segmenting::DefaultWord.parse(english_leading_ellipsis_sentence).length
87
+ end
88
+
89
+ def test_parse_correctly_word_segments_trailing_spaced_periodellipses
90
+ assert_equal 9, NlpPure::Segmenting::DefaultWord.parse(english_trailing_ellipsis_sentence).length
91
+ end
92
+
93
+ def test_parse_does_not_word_segment_abbreviations
94
+ assert_equal 7, NlpPure::Segmenting::DefaultWord.parse(english_abbreviation_sentence).length
95
+ end
96
+
97
+ def test_parse_correctly_word_segments_longer_texts
98
+ assert_equal 22, NlpPure::Segmenting::DefaultWord.parse(english_simple_paragraph).length
99
+ end
100
+
101
+ def test_parse_correctly_word_segments_line_breaks
102
+ assert_equal 22, NlpPure::Segmenting::DefaultWord.parse(english_simple_line_breaks).length
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,13 @@
1
+ # encoding: utf-8
2
+
3
+ require 'minitest/autorun'
4
+ require 'nlp_pure/segmenting'
5
+
6
+ #
7
+ class TestNlpPureSegmenting < Minitest::Test
8
+ describe '[module]' do
9
+ def test_module_is_defined
10
+ assert_equal defined?(NlpPure::Segmenting), 'constant'
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ # encoding: utf-8
2
+
3
+ require 'minitest/autorun'
4
+ require 'nlp_pure'
5
+
6
+ #
7
+ class TestNlpPure < Minitest::Test
8
+ describe '[module]' do
9
+ def test_module_is_defined
10
+ assert_equal defined?(NlpPure), 'constant'
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,4 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'fixtures/corpus_english_simple'
4
+
metadata CHANGED
@@ -1,55 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nlp-pure
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Reid Parham
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-16 00:00:00.000000000 Z
11
+ date: 2017-04-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '10.4'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: '10.4'
27
27
  - !ruby/object:Gem::Dependency
28
- name: rspec
28
+ name: minitest
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ~>
32
32
  - !ruby/object:Gem::Version
33
- version: '3.0'
33
+ version: '5.5'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ~>
39
39
  - !ruby/object:Gem::Version
40
- version: '3.0'
40
+ version: '5.5'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: coveralls
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ~>
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0.7'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ~>
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.7'
55
55
  description: Natural language processing algorithms implemented in pure Ruby with
@@ -60,27 +60,29 @@ executables: []
60
60
  extensions: []
61
61
  extra_rdoc_files: []
62
62
  files:
63
- - ".gitignore"
64
- - ".rspec"
65
- - ".rubocop.yml"
66
- - ".travis.yml"
63
+ - .gitignore
64
+ - .rspec
65
+ - .rubocop.yml
66
+ - .travis.yml
67
67
  - CHANGELOG.md
68
68
  - CONTRIBUTING.md
69
69
  - Gemfile
70
- - Guardfile
71
70
  - LICENSE
72
71
  - README.md
73
72
  - Rakefile
74
73
  - lib/nlp_pure.rb
75
74
  - lib/nlp_pure/logging.rb
76
75
  - lib/nlp_pure/segmenting.rb
76
+ - lib/nlp_pure/segmenting/default_sentence.rb
77
77
  - lib/nlp_pure/segmenting/default_word.rb
78
78
  - lib/nlp_pure/version.rb
79
79
  - nlp-pure.gemspec
80
- - spec/lib/nlp_pure_spec.rb
81
- - spec/lib/segmenting/default_word_spec.rb
82
- - spec/lib/segmenting_spec.rb
83
- - spec/spec_helper.rb
80
+ - test/fixtures/corpus_english_simple.rb
81
+ - test/lib/nlp_pure/segmenting/default_sentence_test.rb
82
+ - test/lib/nlp_pure/segmenting/default_word_test.rb
83
+ - test/lib/nlp_pure/segmenting_test.rb
84
+ - test/lib/nlp_pure_test.rb
85
+ - test/test_helper.rb
84
86
  homepage: https://github.com/parhamr/nlp-pure
85
87
  licenses:
86
88
  - MIT
@@ -91,24 +93,19 @@ require_paths:
91
93
  - lib
92
94
  required_ruby_version: !ruby/object:Gem::Requirement
93
95
  requirements:
94
- - - ">="
96
+ - - '>='
95
97
  - !ruby/object:Gem::Version
96
98
  version: '0'
97
99
  required_rubygems_version: !ruby/object:Gem::Requirement
98
100
  requirements:
99
- - - ">="
101
+ - - '>='
100
102
  - !ruby/object:Gem::Version
101
103
  version: '0'
102
104
  requirements: []
103
105
  rubyforge_project:
104
- rubygems_version: 2.2.2
106
+ rubygems_version: 2.0.14.1
105
107
  signing_key:
106
108
  specification_version: 4
107
109
  summary: Natural language processing algorithms implemented in pure Ruby with minimal
108
110
  dependencies
109
- test_files:
110
- - spec/lib/nlp_pure_spec.rb
111
- - spec/lib/segmenting/default_word_spec.rb
112
- - spec/lib/segmenting_spec.rb
113
- - spec/spec_helper.rb
114
- has_rdoc:
111
+ test_files: []
data/Guardfile DELETED
@@ -1,20 +0,0 @@
1
- guard :rspec, cmd: "bundle exec rspec", all_on_start: false, all_after_pass: false, failed_mode: :none do
2
- require "guard/rspec/dsl"
3
- dsl = Guard::RSpec::Dsl.new(self)
4
-
5
- # RSpec files
6
- rspec = dsl.rspec
7
- watch(rspec.spec_helper) { rspec.spec_dir }
8
- watch(rspec.spec_support) { rspec.spec_dir }
9
- watch(rspec.spec_files)
10
-
11
- # Ruby files
12
- ruby = dsl.ruby
13
- dsl.watch_spec_files_for(ruby.lib_files)
14
-
15
- end
16
-
17
- guard :rubocop, all_on_start: false, keep_failed: false do
18
- watch(%r{.+\.rb$})
19
- watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
20
- end
@@ -1,11 +0,0 @@
1
- # encoding: utf-8
2
- require 'spec_helper'
3
- require 'nlp_pure'
4
-
5
- describe NlpPure do
6
- describe '[module]' do
7
- it 'is defined' do
8
- expect(defined?(NlpPure)).to be_truthy
9
- end
10
- end
11
- end
@@ -1,207 +0,0 @@
1
- # encoding: utf-8
2
- require 'spec_helper'
3
- require 'nlp_pure/segmenting/default_word'
4
-
5
- describe NlpPure::Segmenting::DefaultWord do
6
- describe '[module]' do
7
- it 'is defined' do
8
- expect(defined?(NlpPure::Segmenting::DefaultWord)).to be_truthy
9
- end
10
-
11
- describe '::DEFAULT_OPTIONS' do
12
- it 'is Hash' do
13
- expect(NlpPure::Segmenting::DefaultWord::DEFAULT_OPTIONS).to be_a Hash
14
- end
15
- end
16
- end
17
-
18
- describe '.parse' do
19
- context 'English' do
20
- let(:english_simple_sentence) { 'The quick brown fox jumps over the lazy dog.' }
21
- let(:english_hyphen_sentence) { 'The New York-based company hired new staff.' }
22
- let(:english_dash_sentence) { 'The quick brown fox—full of energy—jumps over the lazy dog.' }
23
- let(:english_spaced_dash_sentence) { 'The quick brown fox — full of energy — jumps over the lazy dog.' }
24
- let(:english_twohyphen_sentence) { 'The quick brown fox--full of energy--jumps over the lazy dog.' }
25
- let(:english_ellipsis_sentence) { 'The quick brown fox…jumps over the lazy dog.' }
26
- let(:english_spaced_ellipsis_sentence) { 'The quick brown fox … jumps over the lazy dog.' }
27
- let(:english_period_ellipsis_sentence) { 'The quick brown fox...jumps over the lazy dog.' }
28
- let(:english_leading_ellipsis_sentence) { ' … the quick brown fox jumps over the lazy dog.' }
29
- let(:english_leading_period_ellipsis_sentence) { ' ... the quick brown fox jumps over the lazy dog.' }
30
- let(:english_trailing_ellipsis_sentence) { 'The quick brown fox jumps over the lazy dog … ' }
31
- let(:english_spaced_period_ellipsis_sentence) { 'The quick brown fox ... jumps over the lazy dog.' }
32
- let(:english_abbreviation_sentence) { 'The U.S.A. is a member of NATO.' }
33
- let(:english_simple_paragraph) { 'Mary had a little lamb. The lamb’s fleece was white as snow. Everywhere that Mary went, the lamb was sure to go.' }
34
- let(:english_simple_line_breaks) { "Mary had a little lamb,\nHis fleece was white as snow,\nAnd everywhere that Mary went,\nThe lamb was sure to go." }
35
-
36
- context '(with nil options)' do
37
- before do
38
- expect(NlpPure::Segmenting::DefaultWord).to receive(:options).at_least(:once).and_return(nil)
39
- end
40
-
41
- it 'raises NoMethodError' do
42
- expect { NlpPure::Segmenting::DefaultWord.parse(english_simple_sentence) }.to raise_error
43
- end
44
- end
45
-
46
- context '(with blank options)' do
47
- before do
48
- expect(NlpPure::Segmenting::DefaultWord).to receive(:options).at_least(:once).and_return({})
49
- end
50
-
51
- it 'returns Array' do
52
- expect(NlpPure::Segmenting::DefaultWord.parse(english_simple_sentence)).to be_an Array
53
- end
54
- end
55
-
56
- context '(with default options)' do
57
- context 'with `nil` argument' do
58
- it 'does not raise error' do
59
- expect { NlpPure::Segmenting::DefaultWord.parse(nil) }.to_not raise_error
60
- end
61
-
62
- it 'returns Array' do
63
- expect(NlpPure::Segmenting::DefaultWord.parse(nil)).to be_an Array
64
- end
65
- end
66
-
67
- context 'without arguments' do
68
- it 'does not raise error' do
69
- expect { NlpPure::Segmenting::DefaultWord.parse }.to_not raise_error
70
- end
71
-
72
- it 'returns nil' do
73
- expect(NlpPure::Segmenting::DefaultWord.parse).to eq nil
74
- end
75
- end
76
-
77
- it 'returns Array' do
78
- expect(NlpPure::Segmenting::DefaultWord.parse(english_simple_sentence)).to be_an Array
79
- end
80
-
81
- it 'correctly counts words' do
82
- expect(NlpPure::Segmenting::DefaultWord.parse(english_simple_sentence).length).to eq(9)
83
- end
84
-
85
- it 'correctly segments hyphens' do
86
- expect(NlpPure::Segmenting::DefaultWord.parse(english_hyphen_sentence).length).to eq(8)
87
- end
88
-
89
- it 'correctly segments double-hyphen dashes' do
90
- expect(NlpPure::Segmenting::DefaultWord.parse(english_twohyphen_sentence).length).to eq(12)
91
- end
92
-
93
- it 'correctly segments dashes' do
94
- expect(NlpPure::Segmenting::DefaultWord.parse(english_spaced_dash_sentence).length).to eq(12)
95
- end
96
-
97
- it 'correctly segments spaced dashes' do
98
- expect(NlpPure::Segmenting::DefaultWord.parse(english_dash_sentence).length).to eq(12)
99
- end
100
-
101
- it 'correctly segments ellipses' do
102
- expect(NlpPure::Segmenting::DefaultWord.parse(english_ellipsis_sentence).length).to eq(9)
103
- end
104
-
105
- it 'correctly segments spaced ellipses' do
106
- expect(NlpPure::Segmenting::DefaultWord.parse(english_spaced_ellipsis_sentence).length).to eq(9)
107
- end
108
-
109
- it 'correctly segments period-ellipses' do
110
- expect(NlpPure::Segmenting::DefaultWord.parse(english_period_ellipsis_sentence).length).to eq(9)
111
- end
112
-
113
- it 'correctly segments spaced period-ellipses' do
114
- expect(NlpPure::Segmenting::DefaultWord.parse(english_spaced_period_ellipsis_sentence).length).to eq(9)
115
- end
116
-
117
- it 'correctly segments with leading, spaced ellipses' do
118
- expect(NlpPure::Segmenting::DefaultWord.parse(english_leading_ellipsis_sentence).length).to eq(9)
119
- end
120
-
121
- it 'correctly segments with trailing, spaced ellipses' do
122
- expect(NlpPure::Segmenting::DefaultWord.parse(english_trailing_ellipsis_sentence).length).to eq(9)
123
- end
124
-
125
- it 'does not segment abbreviations' do
126
- expect(NlpPure::Segmenting::DefaultWord.parse(english_abbreviation_sentence).length).to eq(7)
127
- end
128
-
129
- it 'correctly counts with longer texts' do
130
- expect(NlpPure::Segmenting::DefaultWord.parse(english_simple_paragraph).length).to eq(22)
131
- end
132
-
133
- it 'correctly counts with line breaks' do
134
- expect(NlpPure::Segmenting::DefaultWord.parse(english_simple_line_breaks).length).to eq(22)
135
- end
136
-
137
- context 'benchmarking' do
138
- before do
139
- require 'benchmark'
140
- end
141
-
142
- it 'takes time', benchmarking: true do
143
- expect(
144
- Benchmark.realtime do
145
- 1000.times do
146
- NlpPure::Segmenting::DefaultWord.parse(english_simple_line_breaks)
147
- end
148
- end
149
- ).to be < 0.1
150
- end
151
- end
152
- end
153
- end
154
- end
155
-
156
- describe '.clean_input' do
157
- context 'English' do
158
- let(:english_leading_ellipsis_sentence) { ' … the quick brown fox jumps over the lazy dog.' }
159
-
160
- context '(with nil options)' do
161
- before do
162
- expect(NlpPure::Segmenting::DefaultWord).to receive(:options).at_least(:once).and_return(nil)
163
- end
164
-
165
- it 'raises NoMethodError' do
166
- expect { NlpPure::Segmenting::DefaultWord.clean_input(english_leading_ellipsis_sentence) }.to raise_error
167
- end
168
- end
169
-
170
- context '(with blank options)' do
171
- before do
172
- expect(NlpPure::Segmenting::DefaultWord).to receive(:options).at_least(:once).and_return({})
173
- end
174
-
175
- it 'only strips whitespace' do
176
- expect(NlpPure::Segmenting::DefaultWord.clean_input(english_leading_ellipsis_sentence)).to eq english_leading_ellipsis_sentence.strip
177
- end
178
- end
179
-
180
- context '(with default options)' do
181
- context 'with `nil` argument' do
182
- it 'does not raise error' do
183
- expect { NlpPure::Segmenting::DefaultWord.clean_input(nil) }.to_not raise_error
184
- end
185
-
186
- it 'returns empty String' do
187
- expect(NlpPure::Segmenting::DefaultWord.clean_input(nil)).to eq ''
188
- end
189
- end
190
-
191
- context 'without arguments' do
192
- it 'does not raise error' do
193
- expect { NlpPure::Segmenting::DefaultWord.clean_input }.to_not raise_error
194
- end
195
-
196
- it 'returns nil' do
197
- expect(NlpPure::Segmenting::DefaultWord.clean_input).to eq ''
198
- end
199
- end
200
-
201
- it 'modifies the input' do
202
- expect(NlpPure::Segmenting::DefaultWord.clean_input(english_leading_ellipsis_sentence)).to_not eq english_leading_ellipsis_sentence
203
- end
204
- end
205
- end
206
- end
207
- end
@@ -1,11 +0,0 @@
1
- # encoding: utf-8
2
- require 'spec_helper'
3
- require 'nlp_pure/segmenting'
4
-
5
- describe NlpPure::Segmenting do
6
- describe '[module]' do
7
- it 'is defined' do
8
- expect(defined?(NlpPure::Segmenting)).to be_truthy
9
- end
10
- end
11
- end
@@ -1,16 +0,0 @@
1
- # encoding: utf-8
2
- require 'rspec'
3
- require 'coveralls'
4
-
5
- Coveralls.wear! do
6
- add_filter '/vendor/'
7
- add_filter '/test/'
8
- add_filter '/tmp/'
9
- add_filter '/spec/'
10
- end
11
-
12
- RSpec.configure do |config|
13
- config.expect_with :rspec do |c|
14
- c.syntax = :expect
15
- end
16
- end