ngrams-enabler 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MjEyNjc3YTM2MTA4YmVjMjQ3OGNiMzM1NmRjNzI5YWFmYTE0YWVlNQ==
5
+ data.tar.gz: !binary |-
6
+ ODYwYTMyNTNkZWJjNDI5NTQzMDZkYmYzNTM2ZDdlYWE0ZWFlNzhjMg==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ OTkzNmYxYTYxZjUzNmQ3ODViNzEzMzRlN2NhZDIyMzQ4NGNjY2VlZmZhYTQw
10
+ YjQ1NzcyZWNiMTBhZTMyMTZjNmY4ZWYxMWUzZTVhZWZjOWU1OGM2YWYwYmMz
11
+ ZWJjYmMwMWE5MTcwODI1NjFkMDA1YmE5OWNlY2VjYjJiMDMwMGI=
12
+ data.tar.gz: !binary |-
13
+ YjBmODI1YjkxYjk2ZTE2ZTNkZWJjZmUyMzM4NWMwYjE2NDFjMDU2ODVlYThh
14
+ YzQ4MjcyMGExZDVjMzNhNTc5NjYyNTkxNjhhZDJlMzMyZWFjYzg2OTdhYjdk
15
+ NGY3ODkyZGJlMzAzM2RiNTY2OTc2MzU5YjFjYzg3Yjg3OTQ4MzY=
@@ -0,0 +1,6 @@
1
+ .DS_Store
2
+ *.gem
3
+ .bundle
4
+ Gemfile.lock
5
+ pkg/*
6
+ .rvmrc
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ rvm:
3
+ - jruby-19mode
4
+ - rbx-19mode
5
+ - 1.9.2
6
+ - 1.9.3
7
+ - 2.0.0
8
+ - ruby-head
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ group :development, :test do
6
+ gem 'rake'
7
+ gem 'rspec'
8
+ end
@@ -0,0 +1,41 @@
1
+ ngrams-enabler [![Build Status](https://travis-ci.org/lloydmeta/ngrams-enabler.png?branch=master)](https://travis-ci.org/lloydmeta/ngrams-enabler) [![Code Climate](https://codeclimate.com/github/lloydmeta/ngrams-enabler.png)](https://codeclimate.com/github/lloydmeta/ngrams-enabler)
2
+ -------------
3
+
4
+ A simple way of getting ngrams out of any given String object. Supports CJK (Chinese, Japanese, Korean) as well as alphabet based languages.
5
+
6
+ Example Usage
7
+ =========
8
+
9
+ ```ruby
10
+ require 'ngrams_enabler'
11
+
12
+ "This is just a test".ngrams
13
+
14
+ "This is just a test".ngrams(2)
15
+
16
+ "こんにちは".ngrams
17
+
18
+ "こんにちは".ngrams(2)
19
+ ```
20
+
21
+ ## License
22
+
23
+ Copyright (c) 2013 by Lloyd Chan
24
+
25
+ Permission is hereby granted, free of charge, to any person obtaining a
26
+ copy of this software and associated documentation files (the
27
+ "Software"), to deal in the Software without restriction, including
28
+ without limitation the rights to use, copy, modify, merge, publish,
29
+ distribute, and to permit persons to whom the Software is furnished to do so, subject to
30
+ the following conditions:
31
+
32
+ The above copyright notice and this permission notice shall be included
33
+ in all copies or substantial portions of the Software.
34
+
35
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
36
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
38
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
39
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
40
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
41
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require "rspec/core/rake_task"
4
+
5
+ RSpec::Core::RakeTask.new do |t|
6
+ end
7
+
8
+ task :default => :spec
@@ -0,0 +1,22 @@
1
+ # encoding: utf-8
2
+
3
+ class String
4
+
5
+ def ngrams(n = 1, options = {})
6
+ split_char = options.fetch(:split_char, ' ')
7
+ is_cjk_string = contains_cjk?
8
+ tokens = is_cjk_string ? self.split(//u) : self.split(split_char)
9
+ if tokens.size <= n
10
+ self
11
+ else
12
+ is_cjk_string ? tokens.each_cons(n).map(&:join) : tokens.each_cons(n).map{|cons| cons.join(split_char)}
13
+ end
14
+ end
15
+
16
+ private
17
+
18
+ def contains_cjk?
19
+ !!(self =~ /\p{Han}|\p{Katakana}|\p{Hiragana}|\p{Hangul}/)
20
+ end
21
+
22
+ end
@@ -0,0 +1,3 @@
1
+ # encoding: utf-8
2
+
3
+ require 'ngrams/core_extensions/string'
@@ -0,0 +1,18 @@
1
+ Gem::Specification.new do |gem|
2
+ gem.name = %q{ngrams-enabler}
3
+ gem.version = "0.0.1"
4
+ gem.date = %q{2013-05-16}
5
+ gem.authors = ["Lloyd Meta"]
6
+ gem.email = ["lloydmeta@gmail.com"]
7
+ gem.homepage = "http://github.com/lloydmeta/ngrams-enabler"
8
+ gem.description = %q{A simple way of getting ngrams out of any given String object. Supports CJK (Chinese, Japanese, Korean) as well as alphabet based languages.}
9
+ gem.summary = gem.description
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.require_paths = ["lib"]
15
+
16
+ gem.add_development_dependency 'rake'
17
+ gem.add_development_dependency 'rspec'
18
+ end
@@ -0,0 +1,39 @@
1
+ #encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe String do
5
+
6
+ let(:cjk_string){"これは日本語"}
7
+ let(:english_string){"this string is in english"}
8
+
9
+ describe "#ngrams" do
10
+
11
+ context "not CJK" do
12
+
13
+ it "should return the proper default ngrams" do
14
+ ngrams = english_string.ngrams
15
+ ngrams.should eq(english_string.split(" "))
16
+ end
17
+
18
+ it "should allow me to set higher n" do
19
+ english_string.ngrams(2).should eq(['this string', 'string is', 'is in', 'in english'])
20
+ end
21
+
22
+ end
23
+
24
+ context "CJK string" do
25
+
26
+ it "should return the proper default ngrams" do
27
+ ngrams = cjk_string.ngrams
28
+ ngrams.should eq(%w[こ れ は 日 本 語])
29
+ end
30
+
31
+ it "should allow me to set higher n" do
32
+ cjk_string.ngrams(2).should eq(%w[これ れは は日 日本 本語])
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+
39
+ end
@@ -0,0 +1,12 @@
1
+ require 'bundler/setup'
2
+ require 'rspec'
3
+ require 'rspec/mocks'
4
+ require 'ngrams_enabler'
5
+
6
+ Dir[File.expand_path('../support/**/*', __FILE__)].each { |f| require f }
7
+
8
+ RSpec.configure do |config|
9
+
10
+ #nothing yet
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ngrams-enabler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Lloyd Meta
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-05-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: A simple way of getting ngrams out of any given String object. Supports
42
+ CJK (Chinese, Japanese, Korean) as well as alphabet based languages.
43
+ email:
44
+ - lloydmeta@gmail.com
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - .gitignore
50
+ - .rspec
51
+ - .travis.yml
52
+ - Gemfile
53
+ - README.md
54
+ - Rakefile
55
+ - lib/ngrams/core_extensions/string.rb
56
+ - lib/ngrams_enabler.rb
57
+ - ngrams-enabler.gemspec
58
+ - spec/ngrams_enabler/ngrams_enabler_spec.rb
59
+ - spec/spec_helper.rb
60
+ homepage: http://github.com/lloydmeta/ngrams-enabler
61
+ licenses: []
62
+ metadata: {}
63
+ post_install_message:
64
+ rdoc_options: []
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ! '>='
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubyforge_project:
79
+ rubygems_version: 2.0.3
80
+ signing_key:
81
+ specification_version: 4
82
+ summary: A simple way of getting ngrams out of any given String object. Supports CJK
83
+ (Chinese, Japanese, Korean) as well as alphabet based languages.
84
+ test_files:
85
+ - spec/ngrams_enabler/ngrams_enabler_spec.rb
86
+ - spec/spec_helper.rb