nlp-pure 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +31 -0
- data/.rspec +3 -0
- data/.rubocop.yml +105 -0
- data/.travis.yml +14 -0
- data/CHANGELOG.md +7 -0
- data/CONTRIBUTING.md +23 -0
- data/Gemfile +22 -0
- data/Guardfile +20 -0
- data/LICENSE +22 -0
- data/README.md +91 -0
- data/Rakefile +18 -0
- data/lib/nlp_pure.rb +19 -0
- data/lib/nlp_pure/logging.rb +37 -0
- data/lib/nlp_pure/segmenting.rb +7 -0
- data/lib/nlp_pure/segmenting/default_word.rb +26 -0
- data/lib/nlp_pure/version.rb +5 -0
- data/nlp-pure.gemspec +20 -0
- data/spec/lib/nlp_pure_spec.rb +11 -0
- data/spec/lib/segmenting/default_word_spec.rb +72 -0
- data/spec/lib/segmenting_spec.rb +11 -0
- data/spec/spec_helper.rb +11 -0
- metadata +114 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c4f6247383c48cd71d5ccebf6cc937023d1a880b
|
4
|
+
data.tar.gz: b33e1b19f2bfb5d49c6082f10699f06e2052e32c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7dee5b3c6947d08ef7e8b92a7332baeaaf8785969f98673613a7de2287dbba65dfb06a6f05990036013abaf01235f0017cb75cdf64be27770cc0c06046e30d99
|
7
|
+
data.tar.gz: a01ebec79d05301998d3618c1cd4e1b9cfa23982b36834cee17c529559b2bf36b5a32ea11d843469574924c42be245f192a636a3ee7f288490f7516e39f0a589
|
data/.gitignore
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/test/tmp/
|
9
|
+
/test/version_tmp/
|
10
|
+
/tmp/
|
11
|
+
/vendor/
|
12
|
+
/.bundle/
|
13
|
+
/.sass-cache/
|
14
|
+
|
15
|
+
## Documentation cache and generated files:
|
16
|
+
/.yardoc/
|
17
|
+
/_yardoc/
|
18
|
+
/doc/
|
19
|
+
/rdoc/
|
20
|
+
|
21
|
+
Gemfile.lock
|
22
|
+
.rvmrc
|
23
|
+
.ruby-version
|
24
|
+
.rbx
|
25
|
+
|
26
|
+
## Environment normalisation:
|
27
|
+
/.bundle/
|
28
|
+
/lib/bundler/man/
|
29
|
+
|
30
|
+
*.swp
|
31
|
+
.DS_Store
|
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
AllCops:
|
2
|
+
Exclude:
|
3
|
+
- Guardfile
|
4
|
+
- 'vendor/**/*'
|
5
|
+
|
6
|
+
RunRailsCops: false
|
7
|
+
|
8
|
+
AlignParameters:
|
9
|
+
Enabled: false
|
10
|
+
ClassAndModuleChildren:
|
11
|
+
Enabled: false
|
12
|
+
Encoding:
|
13
|
+
Enabled: false
|
14
|
+
LineLength:
|
15
|
+
Max: 200
|
16
|
+
HashSyntax:
|
17
|
+
Exclude:
|
18
|
+
- Rakefile
|
19
|
+
- 'spec/**/*'
|
20
|
+
- 'test/**/*'
|
21
|
+
|
22
|
+
# Don't fail on whitespace between method names and arguments
|
23
|
+
Style/SingleSpaceBeforeFirstArg:
|
24
|
+
Enabled: false
|
25
|
+
|
26
|
+
# Indent private/protected/public as deep as method definitions
|
27
|
+
Style/AccessModifierIndentation:
|
28
|
+
EnforcedStyle: indent
|
29
|
+
SupportedStyles:
|
30
|
+
- outdent
|
31
|
+
- indent
|
32
|
+
|
33
|
+
Style/AlignHash:
|
34
|
+
|
35
|
+
# table - left alignment of keys, hash rockets, and values
|
36
|
+
EnforcedHashRocketStyle: table
|
37
|
+
# table - left alignment of keys and values
|
38
|
+
EnforcedColonStyle: key
|
39
|
+
|
40
|
+
EnforcedLastArgumentHashStyle: always_inspect
|
41
|
+
|
42
|
+
Style/AlignParameters:
|
43
|
+
EnforcedStyle: with_fixed_indentation
|
44
|
+
|
45
|
+
Style/AndOr:
|
46
|
+
# Whether `and` and `or` are banned only in conditionals (conditionals)
|
47
|
+
# or completely (always).
|
48
|
+
EnforcedStyle: always
|
49
|
+
|
50
|
+
# Checks if usage of %() or %Q() matches configuration.
|
51
|
+
Style/BarePercentLiterals:
|
52
|
+
EnforcedStyle: percent_q
|
53
|
+
|
54
|
+
Style/BracesAroundHashParameters:
|
55
|
+
EnforcedStyle: context_dependent
|
56
|
+
|
57
|
+
# Checks formatting of special comments
|
58
|
+
Style/CommentAnnotation:
|
59
|
+
Keywords:
|
60
|
+
- TODO
|
61
|
+
- FIXME
|
62
|
+
- OPTIMIZE
|
63
|
+
- HACK
|
64
|
+
- REVIEW
|
65
|
+
- PCI
|
66
|
+
- LEGAL
|
67
|
+
- NOTE
|
68
|
+
|
69
|
+
Style/IndentationWidth:
|
70
|
+
# Number of spaces for each indentation level.
|
71
|
+
Width: 2
|
72
|
+
|
73
|
+
Style/DotPosition:
|
74
|
+
EnforcedStyle: trailing
|
75
|
+
|
76
|
+
Style/EmptyLineBetweenDefs:
|
77
|
+
# If true, this parameter means that single line method definitions don't
|
78
|
+
# need an empty line between them.
|
79
|
+
AllowAdjacentOneLineDefs: false
|
80
|
+
|
81
|
+
Style/EmptyLinesAroundBlockBody:
|
82
|
+
EnforcedStyle: no_empty_lines
|
83
|
+
|
84
|
+
Style/EmptyLinesAroundClassBody:
|
85
|
+
EnforcedStyle: no_empty_lines
|
86
|
+
|
87
|
+
Style/EmptyLinesAroundModuleBody:
|
88
|
+
EnforcedStyle: no_empty_lines
|
89
|
+
|
90
|
+
# Checks whether the source file has a utf-8 encoding comment or not
|
91
|
+
Style/Encoding:
|
92
|
+
EnforcedStyle: when_needed
|
93
|
+
|
94
|
+
Style/HashSyntax:
|
95
|
+
EnforcedStyle: ruby19
|
96
|
+
|
97
|
+
Style/MethodName:
|
98
|
+
EnforcedStyle: snake_case
|
99
|
+
|
100
|
+
# Allow safe assignment in conditions.
|
101
|
+
Style/ParenthesesAroundCondition:
|
102
|
+
AllowSafeAssignment: true
|
103
|
+
|
104
|
+
Style/RaiseArgs:
|
105
|
+
EnforcedStyle: exploded
|
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
data/CONTRIBUTING.md
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
Pull requests are welcomed! Here’s a quick guide:
|
2
|
+
|
3
|
+
1. Fork the repo.
|
4
|
+
|
5
|
+
2. Run the tests. We only take pull requests with passing tests, and it's great
|
6
|
+
to know that you have a clean slate: `bundle && rake`
|
7
|
+
|
8
|
+
3. Add a test for your change. Only refactoring and documentation changes
|
9
|
+
require no new tests. If you are adding functionality or fixing a bug, we need
|
10
|
+
a test!
|
11
|
+
|
12
|
+
4. Make the test pass.
|
13
|
+
|
14
|
+
5. Push to your fork and submit a pull request.
|
15
|
+
|
16
|
+
Syntax:
|
17
|
+
|
18
|
+
* Two spaces, no tabs.
|
19
|
+
* No trailing whitespace. Blank lines should not have any space.
|
20
|
+
* Prefer &&/|| over and/or.
|
21
|
+
* MyClass.my_method(my_arg) not my_method( my_arg ) or my_method my_arg.
|
22
|
+
* a = b and not a=b.
|
23
|
+
* Follow the conventions you see used in the source already.
|
data/Gemfile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
gemspec
|
3
|
+
|
4
|
+
platforms :rbx do
|
5
|
+
gem 'rubysl', '~> 2.0' # if using anything in the ruby standard library
|
6
|
+
gem 'psych' # if using yaml
|
7
|
+
gem 'minitest' # if using minitest
|
8
|
+
gem 'rubinius-developer_tools' # if using any of coverage, debugger, profiler
|
9
|
+
end
|
10
|
+
|
11
|
+
platforms :jruby do
|
12
|
+
gem 'jruby-openssl'
|
13
|
+
gem 'activerecord-jdbcsqlite3-adapter'
|
14
|
+
end
|
15
|
+
|
16
|
+
group :test do
|
17
|
+
gem 'rake'
|
18
|
+
gem 'rspec', '~> 3.0.0'
|
19
|
+
gem 'guard-rspec'
|
20
|
+
gem 'guard-rubocop'
|
21
|
+
gem 'coveralls', require: false
|
22
|
+
end
|
data/Guardfile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
guard :rspec, cmd: "bundle exec rspec", all_on_start: false, all_after_pass: false, failed_mode: :none do
|
2
|
+
require "guard/rspec/dsl"
|
3
|
+
dsl = Guard::RSpec::Dsl.new(self)
|
4
|
+
|
5
|
+
# RSpec files
|
6
|
+
rspec = dsl.rspec
|
7
|
+
watch(rspec.spec_helper) { rspec.spec_dir }
|
8
|
+
watch(rspec.spec_support) { rspec.spec_dir }
|
9
|
+
watch(rspec.spec_files)
|
10
|
+
|
11
|
+
# Ruby files
|
12
|
+
ruby = dsl.ruby
|
13
|
+
dsl.watch_spec_files_for(ruby.lib_files)
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
guard :rubocop, all_on_start: false, keep_failed: false do
|
18
|
+
watch(%r{.+\.rb$})
|
19
|
+
watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
|
20
|
+
end
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Reid Parham
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
# NLP Pure
|
2
|
+
|
3
|
+
[](https://codeclimate.com/github/parhamr/nlp-pure)
|
4
|
+
[](https://travis-ci.org/parhamr/nlp-pure)
|
5
|
+
[](https://coveralls.io/r/parhamr/nlp-pure)
|
6
|
+
|
7
|
+
Natural language processing algorithms implemented in pure Ruby with minimal dependencies.
|
8
|
+
|
9
|
+
NOTE: this is not affiliated with, endorsed by, or in any way connected with [Pure NLP](http://purenlp.com/), a trademark of John La Valle.
|
10
|
+
|
11
|
+
This project aims to provide functionality similar to [Treat](https://github.com/louismullie/treat), [open-nlp](https://github.com/louismullie/open-nlp), and [stanford-core-nlp](https://rubygems.org/gems/stanford-core-nlp) but with fewer dependencies. The code is tested against English language but the algorithm implementations aim to be flexible for other languages.
|
12
|
+
|
13
|
+
|
14
|
+
## Requirements
|
15
|
+
|
16
|
+
TODO
|
17
|
+
|
18
|
+
|
19
|
+
## Installation
|
20
|
+
|
21
|
+
Add this line to your application’s Gemfile:
|
22
|
+
|
23
|
+
```
|
24
|
+
gem 'nlp-pure'
|
25
|
+
```
|
26
|
+
|
27
|
+
And then execute:
|
28
|
+
|
29
|
+
```
|
30
|
+
$ bundle
|
31
|
+
```
|
32
|
+
|
33
|
+
Or install it yourself as:
|
34
|
+
|
35
|
+
```
|
36
|
+
$ gem install nlp-pure
|
37
|
+
```
|
38
|
+
|
39
|
+
|
40
|
+
## Usage
|
41
|
+
|
42
|
+
|
43
|
+
### Word Segmentation
|
44
|
+
|
45
|
+
```
|
46
|
+
$ bundle exec irb
|
47
|
+
irb(main):001:0> require_relative './lib/nlp_pure/segmenting/default_word'
|
48
|
+
=> true
|
49
|
+
irb(main):002:0> NlpPure::Segmenting::DefaultWord.parse 'The quick brown fox jumps over the lazy dog.'
|
50
|
+
=> ["The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog."]
|
51
|
+
irb(main):003:0> NlpPure::Segmenting::DefaultWord.parse 'The New York-based company hired new staff.'
|
52
|
+
=> ["The", "New", "York", "based", "company", "hired", "new", "staff."]
|
53
|
+
irb(main):004:0> NlpPure::Segmenting::DefaultWord.parse 'The U.S.A. is a member of NATO.'
|
54
|
+
=> ["The", "U.S.A.", "is", "a", "member", "of", "NATO."]
|
55
|
+
irb(main):005:0> NlpPure::Segmenting::DefaultWord.parse "Mary had a little lamb,\nHis fleece was white as snow,\nAnd everywhere that Mary went,\nThe lamb was sure to go."
|
56
|
+
=> ["Mary", "had", "a", "little", "lamb,", "His", "fleece", "was", "white", "as", "snow,", "And", "everywhere", "that", "Mary", "went,", "The", "lamb", "was", "sure", "to", "go."]
|
57
|
+
```
|
58
|
+
|
59
|
+
|
60
|
+
## Supported Ruby Versions
|
61
|
+
|
62
|
+
This library aims to support and is [tested against](https://travis-ci.org/parhamr/nlp-pure) the following Ruby
|
63
|
+
implementations:
|
64
|
+
|
65
|
+
* Ruby 2.0
|
66
|
+
* Ruby 2.1
|
67
|
+
* Ruby 2.2
|
68
|
+
* [JRuby](http://www.jruby.org/)
|
69
|
+
* [Rubinius](http://rubini.us/)
|
70
|
+
|
71
|
+
If something doesn't work on one of these interpreters, it's a bug.
|
72
|
+
|
73
|
+
This library may inadvertently work (or seem to work) on other Ruby
|
74
|
+
implementations, however support will only be provided for the versions listed
|
75
|
+
above.
|
76
|
+
|
77
|
+
|
78
|
+
## Versioning
|
79
|
+
|
80
|
+
This library aims to adhere to [Semantic Versioning 2.0.0](http://semver.org/). Violations
|
81
|
+
of this scheme should be reported as bugs. Specifically, if a minor or patch
|
82
|
+
version is released that breaks backward compatibility, that version should be
|
83
|
+
immediately yanked and/or a new version should be immediately released that
|
84
|
+
restores compatibility. Breaking changes to the public API will only be
|
85
|
+
introduced with new major versions. As a result of this policy, you can (and
|
86
|
+
should) specify a dependency on this gem using the [Pessimistic Version
|
87
|
+
Constraint](http://docs.rubygems.org/read/chapter/16#page74) with two digits of precision. For example:
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
spec.add_dependency 'nlp-pure', '~> 0.1'
|
91
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rspec/core/rake_task'
|
6
|
+
require 'rubocop/rake_task'
|
7
|
+
RSpec::Core::RakeTask.new(:spec)
|
8
|
+
|
9
|
+
task :rubocop do
|
10
|
+
require 'rubocop'
|
11
|
+
cli = RuboCop::CLI.new
|
12
|
+
cli.run
|
13
|
+
end
|
14
|
+
|
15
|
+
task :default => [:spec, :rubocop]
|
16
|
+
rescue LoadError => e
|
17
|
+
STDERR << "#{e.class}: #{e.message} (#{e.backtrace[0]})"
|
18
|
+
end
|
data/lib/nlp_pure.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'nlp_pure/version'
|
3
|
+
fail "NLP Pure #{NlpPure::VERSION} does not support Ruby 1.9." if RUBY_PLATFORM != 'java' && RUBY_VERSION < '2.0.0'
|
4
|
+
|
5
|
+
#
|
6
|
+
module NlpPure
|
7
|
+
NAME = 'NlpPure'
|
8
|
+
LICENSE = 'See LICENSE for details.'
|
9
|
+
|
10
|
+
DEFAULTS = {}
|
11
|
+
|
12
|
+
def self.logger
|
13
|
+
NlpPure::Logging.logger
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.logger=(log)
|
17
|
+
NlpPure::Logging.logger = log
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'time'
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
#
|
6
|
+
module NlpPure
|
7
|
+
#
|
8
|
+
module Logging
|
9
|
+
#
|
10
|
+
class Pretty < Logger::Formatter
|
11
|
+
def call(severity, time, program_name, message)
|
12
|
+
"#{time.utc.iso8601(2)} #{::Process.pid} #{severity}: #{message}\n"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.initialize_logger(log_target = STDOUT)
|
17
|
+
oldlogger = defined?(@logger) ? @logger : nil
|
18
|
+
@logger = Logger.new(log_target)
|
19
|
+
@logger.level = Logger::INFO
|
20
|
+
@logger.formatter = Pretty.new
|
21
|
+
oldlogger.close if oldlogger
|
22
|
+
@logger
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.logger
|
26
|
+
defined?(@logger) ? @logger : initialize_logger
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.logger=(log)
|
30
|
+
@logger = (log ? log : Logger.new('/dev/null'))
|
31
|
+
end
|
32
|
+
|
33
|
+
def logger
|
34
|
+
NlpPure::Logging.logger
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module NlpPure
|
4
|
+
module Segmenting
|
5
|
+
#
|
6
|
+
module DefaultWord
|
7
|
+
DEFAULT_OPTIONS = {
|
8
|
+
# hyphen, en dash, em dash, and string
|
9
|
+
split: /[\-–—\s]/
|
10
|
+
}.freeze
|
11
|
+
def self.parse(*args)
|
12
|
+
unless args.nil? || args.empty?
|
13
|
+
input = args[0].to_s
|
14
|
+
input.split(options[:split])
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# NOTE: exposed as a method for easy mock/stub
|
19
|
+
def self.options
|
20
|
+
DEFAULT_OPTIONS
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require_relative '../segmenting'
|
data/nlp-pure.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path('../lib/nlp_pure/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ['Reid Parham']
|
6
|
+
gem.email = ['reid.parham@gmail.com']
|
7
|
+
gem.description = gem.summary = 'Natural language processing algorithms implemented in pure Ruby with minimal dependencies'
|
8
|
+
gem.homepage = 'https://github.com/parhamr/nlp-pure'
|
9
|
+
gem.license = 'MIT'
|
10
|
+
|
11
|
+
gem.executables = []
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- spec/*`.split("\n")
|
14
|
+
gem.name = 'nlp-pure'
|
15
|
+
gem.require_paths = ['lib']
|
16
|
+
gem.version = NlpPure::VERSION
|
17
|
+
gem.add_development_dependency 'rake', '~> 10.4'
|
18
|
+
gem.add_development_dependency 'rspec', '~> 3.0'
|
19
|
+
gem.add_development_dependency 'coveralls', '~> 0.7'
|
20
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'nlp_pure/segmenting/default_word'
|
4
|
+
|
5
|
+
describe NlpPure::Segmenting::DefaultWord do
|
6
|
+
describe '[module]' do
|
7
|
+
it 'is defined' do
|
8
|
+
expect(defined?(NlpPure::Segmenting::DefaultWord)).to be_truthy
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
describe '.parse' do
|
13
|
+
context 'English' do
|
14
|
+
let(:english_simple_sentence) { 'The quick brown fox jumps over the lazy dog.' }
|
15
|
+
let(:english_hyphen_sentence) { 'The New York-based company hired new staff.' }
|
16
|
+
let(:english_dash_sentence) { 'The quick brown fox—full of energy—jumps over the lazy dog.' }
|
17
|
+
let(:english_abbreviation_sentence) { 'The U.S.A. is a member of NATO.' }
|
18
|
+
let(:english_simple_paragraph) { 'Mary had a little lamb. The lamb’s fleece was white as snow. Everywhere that Mary went, the lamb was sure to go.' }
|
19
|
+
let(:english_simple_line_breaks) { "Mary had a little lamb,\nHis fleece was white as snow,\nAnd everywhere that Mary went,\nThe lamb was sure to go." }
|
20
|
+
|
21
|
+
context '(with default options)' do
|
22
|
+
context 'with `nil` argument' do
|
23
|
+
it 'does not raise error' do
|
24
|
+
expect { NlpPure::Segmenting::DefaultWord.parse(nil) }.to_not raise_error
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'returns Array' do
|
28
|
+
expect(NlpPure::Segmenting::DefaultWord.parse(nil)).to be_an Array
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context 'without arguments' do
|
33
|
+
it 'does not raise error' do
|
34
|
+
expect { NlpPure::Segmenting::DefaultWord.parse }.to_not raise_error
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'returns nil' do
|
38
|
+
expect(NlpPure::Segmenting::DefaultWord.parse).to eq nil
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'returns Array' do
|
43
|
+
expect(NlpPure::Segmenting::DefaultWord.parse(english_simple_sentence)).to be_an Array
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'correctly counts words' do
|
47
|
+
expect(NlpPure::Segmenting::DefaultWord.parse(english_simple_sentence).length).to eq(9)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'correctly segments hyphens' do
|
51
|
+
expect(NlpPure::Segmenting::DefaultWord.parse(english_hyphen_sentence).length).to eq(8)
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'correctly segments dashes' do
|
55
|
+
expect(NlpPure::Segmenting::DefaultWord.parse(english_dash_sentence).length).to eq(12)
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'does not segment abbreviations' do
|
59
|
+
expect(NlpPure::Segmenting::DefaultWord.parse(english_abbreviation_sentence).length).to eq(7)
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'correctly counts with longer texts' do
|
63
|
+
expect(NlpPure::Segmenting::DefaultWord.parse(english_simple_paragraph).length).to eq(22)
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'correctly counts with line breaks' do
|
67
|
+
expect(NlpPure::Segmenting::DefaultWord.parse(english_simple_line_breaks).length).to eq(22)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nlp-pure
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Reid Parham
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-02-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '10.4'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '10.4'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: coveralls
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.7'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.7'
|
55
|
+
description: Natural language processing algorithms implemented in pure Ruby with
|
56
|
+
minimal dependencies
|
57
|
+
email:
|
58
|
+
- reid.parham@gmail.com
|
59
|
+
executables: []
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- ".gitignore"
|
64
|
+
- ".rspec"
|
65
|
+
- ".rubocop.yml"
|
66
|
+
- ".travis.yml"
|
67
|
+
- CHANGELOG.md
|
68
|
+
- CONTRIBUTING.md
|
69
|
+
- Gemfile
|
70
|
+
- Guardfile
|
71
|
+
- LICENSE
|
72
|
+
- README.md
|
73
|
+
- Rakefile
|
74
|
+
- lib/nlp_pure.rb
|
75
|
+
- lib/nlp_pure/logging.rb
|
76
|
+
- lib/nlp_pure/segmenting.rb
|
77
|
+
- lib/nlp_pure/segmenting/default_word.rb
|
78
|
+
- lib/nlp_pure/version.rb
|
79
|
+
- nlp-pure.gemspec
|
80
|
+
- spec/lib/nlp_pure_spec.rb
|
81
|
+
- spec/lib/segmenting/default_word_spec.rb
|
82
|
+
- spec/lib/segmenting_spec.rb
|
83
|
+
- spec/spec_helper.rb
|
84
|
+
homepage: https://github.com/parhamr/nlp-pure
|
85
|
+
licenses:
|
86
|
+
- MIT
|
87
|
+
metadata: {}
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options: []
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
requirements: []
|
103
|
+
rubyforge_project:
|
104
|
+
rubygems_version: 2.2.2
|
105
|
+
signing_key:
|
106
|
+
specification_version: 4
|
107
|
+
summary: Natural language processing algorithms implemented in pure Ruby with minimal
|
108
|
+
dependencies
|
109
|
+
test_files:
|
110
|
+
- spec/lib/nlp_pure_spec.rb
|
111
|
+
- spec/lib/segmenting/default_word_spec.rb
|
112
|
+
- spec/lib/segmenting_spec.rb
|
113
|
+
- spec/spec_helper.rb
|
114
|
+
has_rdoc:
|