ngrams-enabler 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MjEyNjc3YTM2MTA4YmVjMjQ3OGNiMzM1NmRjNzI5YWFmYTE0YWVlNQ==
5
+ data.tar.gz: !binary |-
6
+ ODYwYTMyNTNkZWJjNDI5NTQzMDZkYmYzNTM2ZDdlYWE0ZWFlNzhjMg==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ OTkzNmYxYTYxZjUzNmQ3ODViNzEzMzRlN2NhZDIyMzQ4NGNjY2VlZmZhYTQw
10
+ YjQ1NzcyZWNiMTBhZTMyMTZjNmY4ZWYxMWUzZTVhZWZjOWU1OGM2YWYwYmMz
11
+ ZWJjYmMwMWE5MTcwODI1NjFkMDA1YmE5OWNlY2VjYjJiMDMwMGI=
12
+ data.tar.gz: !binary |-
13
+ YjBmODI1YjkxYjk2ZTE2ZTNkZWJjZmUyMzM4NWMwYjE2NDFjMDU2ODVlYThh
14
+ YzQ4MjcyMGExZDVjMzNhNTc5NjYyNTkxNjhhZDJlMzMyZWFjYzg2OTdhYjdk
15
+ NGY3ODkyZGJlMzAzM2RiNTY2OTc2MzU5YjFjYzg3Yjg3OTQ4MzY=
@@ -0,0 +1,6 @@
1
+ .DS_Store
2
+ *.gem
3
+ .bundle
4
+ Gemfile.lock
5
+ pkg/*
6
+ .rvmrc
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ rvm:
3
+ - jruby-19mode
4
+ - rbx-19mode
5
+ - 1.9.2
6
+ - 1.9.3
7
+ - 2.0.0
8
+ - ruby-head
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ group :development, :test do
6
+ gem 'rake'
7
+ gem 'rspec'
8
+ end
@@ -0,0 +1,41 @@
1
+ ngrams-enabler [![Build Status](https://travis-ci.org/lloydmeta/ngrams-enabler.png?branch=master)](https://travis-ci.org/lloydmeta/ngrams-enabler) [![Code Climate](https://codeclimate.com/github/lloydmeta/ngrams-enabler.png)](https://codeclimate.com/github/lloydmeta/ngrams-enabler)
2
+ -------------
3
+
4
+ A simple way of getting ngrams out of any given String object. Supports CJK (Chinese, Japanese, Korean) as well as alphabet based languages.
5
+
6
+ Example Usage
7
+ =========
8
+
9
+ ```ruby
10
+ require 'ngrams_enabler'
11
+
12
+ "This is just a test".ngrams
13
+
14
+ "This is just a test".ngrams(2)
15
+
16
+ "こんにちは".ngrams
17
+
18
+ "こんにちは".ngrams(2)
19
+ ```
20
+
21
+ ## License
22
+
23
+ Copyright (c) 2013 by Lloyd Chan
24
+
25
+ Permission is hereby granted, free of charge, to any person obtaining a
26
+ copy of this software and associated documentation files (the
27
+ "Software"), to deal in the Software without restriction, including
28
+ without limitation the rights to use, copy, modify, merge, publish,
29
+ distribute, and to permit persons to whom the Software is furnished to do so, subject to
30
+ the following conditions:
31
+
32
+ The above copyright notice and this permission notice shall be included
33
+ in all copies or substantial portions of the Software.
34
+
35
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
36
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
38
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
39
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
40
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
41
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require "rspec/core/rake_task"
4
+
5
+ RSpec::Core::RakeTask.new do |t|
6
+ end
7
+
8
+ task :default => :spec
@@ -0,0 +1,22 @@
1
+ # encoding: utf-8
2
+
3
+ class String
4
+
5
+ def ngrams(n = 1, options = {})
6
+ split_char = options.fetch(:split_char, ' ')
7
+ is_cjk_string = contains_cjk?
8
+ tokens = is_cjk_string ? self.split(//u) : self.split(split_char)
9
+ if tokens.size <= n
10
+ self
11
+ else
12
+ is_cjk_string ? tokens.each_cons(n).map(&:join) : tokens.each_cons(n).map{|cons| cons.join(split_char)}
13
+ end
14
+ end
15
+
16
+ private
17
+
18
+ def contains_cjk?
19
+ !!(self =~ /\p{Han}|\p{Katakana}|\p{Hiragana}|\p{Hangul}/)
20
+ end
21
+
22
+ end
@@ -0,0 +1,3 @@
1
+ # encoding: utf-8
2
+
3
+ require 'ngrams/core_extensions/string'
@@ -0,0 +1,18 @@
1
+ Gem::Specification.new do |gem|
2
+ gem.name = %q{ngrams-enabler}
3
+ gem.version = "0.0.1"
4
+ gem.date = %q{2013-05-16}
5
+ gem.authors = ["Lloyd Meta"]
6
+ gem.email = ["lloydmeta@gmail.com"]
7
+ gem.homepage = "http://github.com/lloydmeta/ngrams-enabler"
8
+ gem.description = %q{A simple way of getting ngrams out of any given String object. Supports CJK (Chinese, Japanese, Korean) as well as alphabet based languages.}
9
+ gem.summary = gem.description
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.require_paths = ["lib"]
15
+
16
+ gem.add_development_dependency 'rake'
17
+ gem.add_development_dependency 'rspec'
18
+ end
@@ -0,0 +1,39 @@
1
+ #encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe String do
5
+
6
+ let(:cjk_string){"これは日本語"}
7
+ let(:english_string){"this string is in english"}
8
+
9
+ describe "#ngrams" do
10
+
11
+ context "not CJK" do
12
+
13
+ it "should return the proper default ngrams" do
14
+ ngrams = english_string.ngrams
15
+ ngrams.should eq(english_string.split(" "))
16
+ end
17
+
18
+ it "should allow me to set higher n" do
19
+ english_string.ngrams(2).should eq(['this string', 'string is', 'is in', 'in english'])
20
+ end
21
+
22
+ end
23
+
24
+ context "CJK string" do
25
+
26
+ it "should return the proper default ngrams" do
27
+ ngrams = cjk_string.ngrams
28
+ ngrams.should eq(%w[こ れ は 日 本 語])
29
+ end
30
+
31
+ it "should allow me to set higher n" do
32
+ cjk_string.ngrams(2).should eq(%w[これ れは は日 日本 本語])
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+
39
+ end
@@ -0,0 +1,12 @@
1
+ require 'bundler/setup'
2
+ require 'rspec'
3
+ require 'rspec/mocks'
4
+ require 'ngrams_enabler'
5
+
6
+ Dir[File.expand_path('../support/**/*', __FILE__)].each { |f| require f }
7
+
8
+ RSpec.configure do |config|
9
+
10
+ #nothing yet
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ngrams-enabler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Lloyd Meta
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-05-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: A simple way of getting ngrams out of any given String object. Supports
42
+ CJK (Chinese, Japanese, Korean) as well as alphabet based languages.
43
+ email:
44
+ - lloydmeta@gmail.com
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - .gitignore
50
+ - .rspec
51
+ - .travis.yml
52
+ - Gemfile
53
+ - README.md
54
+ - Rakefile
55
+ - lib/ngrams/core_extensions/string.rb
56
+ - lib/ngrams_enabler.rb
57
+ - ngrams-enabler.gemspec
58
+ - spec/ngrams_enabler/ngrams_enabler_spec.rb
59
+ - spec/spec_helper.rb
60
+ homepage: http://github.com/lloydmeta/ngrams-enabler
61
+ licenses: []
62
+ metadata: {}
63
+ post_install_message:
64
+ rdoc_options: []
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ! '>='
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubyforge_project:
79
+ rubygems_version: 2.0.3
80
+ signing_key:
81
+ specification_version: 4
82
+ summary: A simple way of getting ngrams out of any given String object. Supports CJK
83
+ (Chinese, Japanese, Korean) as well as alphabet based languages.
84
+ test_files:
85
+ - spec/ngrams_enabler/ngrams_enabler_spec.rb
86
+ - spec/spec_helper.rb