shingling 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in shingling.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/lib/shingling.rb ADDED
@@ -0,0 +1,50 @@
1
+ require "shingling/version"
2
+
3
+ module Resemblance
4
+ class Shingling
5
+ def initialize(content = '', options = {})
6
+ @content = content
7
+ @shingle_length = options[:shingle_length] || 5
8
+ @stop_words = options[:stop_words] || []
9
+ @downcase = options[:downcase] || false
10
+ @replace_chars = options[:replace_chars] || {}
11
+ end
12
+
13
+ def each_shingles
14
+ word = ""
15
+ char_flag = false
16
+ shingling = []
17
+ position_end_words = []
18
+ position_start, position_end = 0, 0
19
+
20
+ @content.each_char do |char|
21
+ char = @replace_chars[char] if @replace_chars.key? char
22
+ if char !~ /[[:word:]]+/
23
+ char_flag = true
24
+ else
25
+ if char_flag
26
+ if !stop_word?(word) || word !~ /\S/
27
+ shingling << (@downcase ? Unicode::downcase(word) : word)
28
+ word = ""
29
+ position_end_words << position_end
30
+ if shingling.size == @shingle_length
31
+ yield(shingling, position_start, position_end - 1)
32
+ position_start = position_end_words.shift
33
+ shingling.shift
34
+ end
35
+ end
36
+ char_flag = false
37
+ end
38
+ word << char
39
+ end
40
+ position_end += 1
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ def stop_word? word
47
+ @stop_words.include? word
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,3 @@
1
+ module Shingling
2
+ VERSION = "0.0.3"
3
+ end
data/shingling.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "shingling/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "shingling"
7
+ s.version = Shingling::VERSION
8
+ s.authors = ["vad"]
9
+ s.email = ["vad4msiu@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = "Shingling"
12
+ s.description = "Shingling"
13
+
14
+ s.rubyforge_project = "shingling"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_dependency "unicode"
22
+ s.add_development_dependency "rspec"
23
+ end
@@ -0,0 +1,5 @@
1
+ require "shingling"
2
+
3
+ describe Shingling do
4
+ it "Create test"
5
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: shingling
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - vad
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-11-05 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: unicode
16
+ requirement: &2152592500 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2152592500
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ requirement: &2152591880 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2152591880
36
+ description: Shingling
37
+ email:
38
+ - vad4msiu@gmail.com
39
+ executables: []
40
+ extensions: []
41
+ extra_rdoc_files: []
42
+ files:
43
+ - .gitignore
44
+ - Gemfile
45
+ - Rakefile
46
+ - lib/shingling.rb
47
+ - lib/shingling/version.rb
48
+ - shingling.gemspec
49
+ - spec/shingling_spec.rb
50
+ homepage: ''
51
+ licenses: []
52
+ post_install_message:
53
+ rdoc_options: []
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ! '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubyforge_project: shingling
70
+ rubygems_version: 1.8.10
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: Shingling
74
+ test_files:
75
+ - spec/shingling_spec.rb
76
+ has_rdoc: