hashing_trick_ml 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3c6d17bc5e27321ad4ee61002836733beec9a5aa
4
+ data.tar.gz: cd802c82957b35aa0b160f1a66bd8c65ed3f185f
5
+ SHA512:
6
+ metadata.gz: cc3fd7902c188471abfd01fab7f306cee38089d8cf10f1aa2ccc25961c9a6baadff9bf9a81c4f0ee979aba76837ed643ef375ab20ce97f2096911ca553562cdd
7
+ data.tar.gz: bee07558698fc203aefcb29ce7bab0afc888e3800932dfafb2c519d1fbfb09da75a4f0403495f643cf9039ecf64e91a5897f73f99001f821fb6288d65ae6a558
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2017 Mohammed Gharbi
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,28 @@
1
+ # HashingTrickMl
2
+ In machine leaning, Hashing Trick is a fast and space-efficient way of vectorizing features.
3
+
4
+ ## Usage
5
+ How to use my plugin.
6
+
7
+ ## Installation
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'hashing_trick_ml'
12
+ ```
13
+
14
+ And then execute:
15
+ ```bash
16
+ $ bundle
17
+ ```
18
+
19
+ Or install it yourself as:
20
+ ```bash
21
+ $ gem install hashing_trick_ml
22
+ ```
23
+
24
+ ## Contributing
25
+ Contribution directions go here.
26
+
27
+ ## License
28
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,33 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+
9
+ RDoc::Task.new(:rdoc) do |rdoc|
10
+ rdoc.rdoc_dir = 'rdoc'
11
+ rdoc.title = 'HashingTrickMl'
12
+ rdoc.options << '--line-numbers'
13
+ rdoc.rdoc_files.include('README.md')
14
+ rdoc.rdoc_files.include('lib/**/*.rb')
15
+ end
16
+
17
+
18
+
19
+
20
+
21
+
22
+ require 'bundler/gem_tasks'
23
+
24
+ require 'rake/testtask'
25
+
26
+ Rake::TestTask.new(:test) do |t|
27
+ t.libs << 'test'
28
+ t.pattern = 'test/**/*_test.rb'
29
+ t.verbose = false
30
+ end
31
+
32
+
33
+ task default: :test
@@ -0,0 +1,57 @@
1
+ module HashingTrickMl
2
+ module ActsAsVectorized
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ end
7
+
8
+ module ClassMethods
9
+ def acts_as_vectorized(options = {})
10
+ cattr_accessor :default_dimensions
11
+ self.default_dimensions = options[:default_dimensions] || 1_000
12
+ end
13
+ end
14
+
15
+ def build_word_vector(data, dimensions: self.class.default_dimensions, separator: ' ')
16
+ words = normalize_words(data).split(separator)
17
+ words.each_with_object([0] * dimensions) do |word, result|
18
+ result[Digest::SHA2.digest(word.downcase).last(4).unpack('N1').first % dimensions] += 1
19
+ end
20
+ end
21
+
22
+ def build_boolean_vector(subset, full_set)
23
+ subset.each_with_object([0] * full_set.size) do |value, result|
24
+ index = full_set.index(value)
25
+ result[index] = 1 if index.present?
26
+ end
27
+ end
28
+
29
+ def build_fuzzy_vector(statement)
30
+ statement ? 1 : 0
31
+ end
32
+
33
+ def build_exponential_vector(values, dimensions:)
34
+ values.reject(&:blank?).each_with_object([0] * dimensions) do |value, result|
35
+ index = Math.log(value.to_f, 1.5)
36
+ index = 0 if index.negative?
37
+ index = dimensions - 1 if index > dimensions - 1
38
+
39
+ result[index.floor] += 1
40
+ end
41
+ end
42
+
43
+ def build_maybe_nil_vector(value)
44
+ [value.presence || 0, value.present? ? 1 : 0]
45
+ end
46
+
47
+ private
48
+
49
+ def normalize_words(data)
50
+ strip_tags(data || '').gsub(/[^[:alpha:][:digit:]]+/, ' ').downcase
51
+ end
52
+
53
+ def strip_tags(html)
54
+ ActionController::Base.helpers.strip_tags(html)
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,3 @@
1
+ module HashingTrickMl
2
+ VERSION = '0.1.0'
3
+ end
@@ -0,0 +1,4 @@
1
+ require 'hashing_trick_ml/acts_as_vectorized'
2
+
3
+ module HashingTrickMl
4
+ end
@@ -0,0 +1,4 @@
1
+ # desc "Explaining what the task does"
2
+ # task :hashing_trick_ml do
3
+ # # Task goes here
4
+ # end
metadata ADDED
@@ -0,0 +1,53 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hashing_trick_ml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Mohammed Gharbi
8
+ - Ivan Fomichev
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2017-11-23 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Hashing Trick ML for vectorizing features.
15
+ email:
16
+ - mohammed.gharbi@eventinc.de
17
+ - ivan.fomichev@eventinc.de
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - MIT-LICENSE
23
+ - README.md
24
+ - Rakefile
25
+ - lib/hashing_trick_ml.rb
26
+ - lib/hashing_trick_ml/acts_as_vectorized.rb
27
+ - lib/hashing_trick_ml/version.rb
28
+ - lib/tasks/hashing_trick_ml_tasks.rake
29
+ homepage: https://www.eventinc.de/
30
+ licenses:
31
+ - MIT
32
+ metadata: {}
33
+ post_install_message:
34
+ rdoc_options: []
35
+ require_paths:
36
+ - lib
37
+ required_ruby_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ required_rubygems_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ requirements: []
48
+ rubyforge_project:
49
+ rubygems_version: 2.6.13
50
+ signing_key:
51
+ specification_version: 4
52
+ summary: Hashing Trick Machine Learning.
53
+ test_files: []