yannitor 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 86f4b0ff2fca0aaa5270a85df9fcf5a802154eed
4
- data.tar.gz: 55d3f801ad79c800177ca453e08be692eedaf249
2
+ SHA256:
3
+ metadata.gz: 3aa7e3ec52c46173c2b250128ae91232b56f5f77128b96076e03a4bb754d7ebc
4
+ data.tar.gz: 96ba16f09a7d85186d97228758f4a2ff91f996f45204f05f82b6fb1c1b741c4f
5
5
  SHA512:
6
- metadata.gz: b636f749e8a5bae71eb3cbf953925522279ea8272ee6a1b1b63620617743d878d7202f5182d5486debe33034055f6c544fdbb15e47bbf471be8455eac4ccf998
7
- data.tar.gz: 7ac6c13650a2463901647d809773060206e4c6c684f6a731eece17f95ce34acb34d52bfeaecb15e73a81052aee685bf4cc81c11eca8e42e519acab2e5e87b23f
6
+ metadata.gz: f786eaffca03c775ab53f0931ce45ca1c66896132e9a3d052547d4a71711d32ef044378370b077b4bdfcbeea79f939ee002ff18f8fe821367113de0aa694f7d8
7
+ data.tar.gz: f87a486ac25649dab0f2e6ffb10e3030061d6e6d96c12e938fa4e2efb0e9f43b2b12720dcb29ccf5befc26cb78d28b5bbe135e60a430eeb84dac970e7f552208
@@ -1,59 +1,64 @@
1
- require 'active_record'
2
- # require 'active_record/version'
3
- # require 'active_support/core_ext/module'
1
+ # frozen_string_literal: true
4
2
 
5
- # begin
6
- # require 'rails/engine'
7
- # end
3
+ require 'active_record'
8
4
 
9
5
  module Yannitor
10
6
  module Broom
11
- attr_accessor :features
7
+ attr_accessor :yannitor_features
12
8
 
13
9
  def yannitor_is_cleaning(feats = {})
14
- self.features = feats
10
+ self.yannitor_features = feats
15
11
  end
16
12
 
17
- def to_one_hot target_column, type = 'text'
18
- sorted_value_array = self.pluck("distinct(#{target_column})")
19
-
20
- _table_name = self.table_name
21
- values_select = %Q(SELECT value FROM (values ('#{ sorted_value_array.join("'), ('") }')) s(value))
22
-
23
- self.select(%Q(
24
- #{_table_name}.id,
25
- ARRAY_AGG(CASE WHEN sorted_value_table.value::#{type} = #{_table_name}.#{target_column}::#{type} THEN 1 ELSE 0 END) AS o#{target_column}
26
- )).joins(%Q(
13
+ def to_one_hot(target_column, type = 'text')
14
+ sorted_value_array = pluck("distinct(#{target_column})").join("'), ('")
15
+
16
+ table_name = self.table_name
17
+ values_select = %(
18
+ SELECT value FROM (values ('#{sorted_value_array}')) s(value)
19
+ )
20
+
21
+ self.select(%(
22
+ #{table_name}.id,
23
+ ARRAY_AGG(CASE
24
+ WHEN sorted_value_table.value::#{type} = #{table_name}.#{target_column}::#{type}
25
+ THEN 1
26
+ ELSE 0
27
+ END
28
+ ) AS o#{target_column}
29
+ )).joins(%(
27
30
  LEFT JOIN (#{values_select}) AS sorted_value_table ON 1=1
28
- )).group("#{_table_name}.id")
31
+ )).group("#{table_name}.id")
29
32
  end
30
33
 
31
34
  def vectorize
32
- _table_name = self.table_name
33
-
34
- select('*, ' + features[:linear].map do |feature|
35
- min = all.minimum(feature)
36
- max = all.maximum(feature)
37
- "CAST((#{_table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float) AS float) as n#{feature}"
38
- end.join(', ')).all.map do |obj|
35
+ select('*, ' + linear_feature_select).build_linear_features
36
+ end
39
37
 
40
- obj.class.features[:linear].map do |feature|
38
+ def build_linear_features
39
+ all.map do |obj|
40
+ obj.class.yannitor_features[:linear].map do |feature|
41
41
  obj.send("n#{feature}").to_f
42
42
  end
43
43
  end
44
44
  end
45
45
 
46
- def nelect(feature)
47
- _table_name = self.table_name
46
+ def linear_feature_select
47
+ yannitor_features[:linear].map do |feature|
48
+ min = all.minimum(feature)
49
+ max = all.maximum(feature)
50
+ "CAST((#{table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float) AS float) as n#{feature}"
51
+ end.join(', ')
52
+ end
48
53
 
54
+ def nelect(feature)
49
55
  min = all.minimum(feature)
50
56
  max = all.maximum(feature)
51
-
52
- self.select("*, (#{_table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float)::float as n#{feature}")
57
+
58
+ select("*, (#{table_name}.#{feature}::float - #{min}::float) / (#{max}::float - #{min}::float)::float as n#{feature}")
53
59
  end
54
60
 
55
61
  def normalize(feature)
56
- print "Normalizing #{feature}"
57
62
  min = all.minimum(feature)
58
63
  max = all.maximum(feature)
59
64
  data = all.nelect(feature).map do |e|
@@ -63,14 +68,13 @@ module Yannitor
63
68
  [data, min, max]
64
69
  end
65
70
 
66
- def to_file
67
- CSV.open("data.csv", "wb", {col_sep: ' '}) do |csv|
71
+ def to_file(file_name = 'data.csv', separator = ' ')
72
+ CSV.open(file_name, 'wb', col_sep: separator) do |csv|
68
73
  all.vectorize.each { |v| csv << v }
69
74
  end
70
75
 
71
76
  nil
72
77
  end
73
-
74
78
  end
75
79
  end
76
80
 
@@ -1,3 +1,3 @@
1
1
  module Yannitor
2
- VERSION = "0.5.0"
2
+ VERSION = "0.6.0"
3
3
  end
data/lib/yannitor.rb CHANGED
@@ -1,6 +1,7 @@
1
- require "yannitor/version"
2
- require "yannitor/cleaner"
1
+ # frozen_string_literal: true
2
+
3
+ require 'yannitor/version'
4
+ require 'yannitor/cleaner'
3
5
 
4
6
  module Yannitor
5
- # Your code goes here...
6
7
  end
data/yannitor.gemspec CHANGED
@@ -7,21 +7,12 @@ Gem::Specification.new do |spec|
7
7
  spec.name = "yannitor"
8
8
  spec.version = Yannitor::VERSION
9
9
  spec.authors = ["Danielius Visockas"]
10
- spec.email = ["danielius@wisemonks.com"]
10
+ spec.email = ["danieliusvisockas@gmail.com"]
11
11
 
12
- spec.summary = %q{Gem for preprocessing Active record collections}
12
+ spec.summary = %q{Helps you build one-hot or min-max encoded vectors from ActiveRecord collections}
13
13
  spec.description = %q{I'll clean your data}
14
14
  spec.homepage = "https://github.com"
15
15
 
16
- # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
17
- # to allow pushing to a single host or delete this section to allow pushing to any host.
18
- # if spec.respond_to?(:metadata)
19
- # spec.metadata['allowed_push_host'] = "'http://mygemserver.com'"
20
- # else
21
- # raise "RubyGems 2.0 or newer is required to protect against " \
22
- # "public gem pushes."
23
- # end
24
-
25
16
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
26
17
  f.match(%r{^(test|spec|features)/})
27
18
  end
@@ -29,6 +20,7 @@ Gem::Specification.new do |spec|
29
20
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
21
  spec.require_paths = ["lib"]
31
22
 
23
+ spec.add_runtime_dependency "active_record", ["> 3.2.0"]
32
24
  spec.add_development_dependency "bundler", "~> 1.14"
33
25
  spec.add_development_dependency "rake", "~> 10.0"
34
26
  spec.add_development_dependency "rspec", "~> 3.0"
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yannitor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danielius Visockas
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-11 00:00:00.000000000 Z
11
+ date: 2019-04-05 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: active_record
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">"
18
+ - !ruby/object:Gem::Version
19
+ version: 3.2.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">"
25
+ - !ruby/object:Gem::Version
26
+ version: 3.2.0
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: bundler
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -54,7 +68,7 @@ dependencies:
54
68
  version: '3.0'
55
69
  description: I'll clean your data
56
70
  email:
57
- - danielius@wisemonks.com
71
+ - danieliusvisockas@gmail.com
58
72
  executables: []
59
73
  extensions: []
60
74
  extra_rdoc_files: []
@@ -91,8 +105,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
91
105
  version: '0'
92
106
  requirements: []
93
107
  rubyforge_project:
94
- rubygems_version: 2.6.14
108
+ rubygems_version: 2.7.8
95
109
  signing_key:
96
110
  specification_version: 4
97
- summary: Gem for preprocessing Active record collections
111
+ summary: Helps you build one-hot or min-max encoded vectors from ActiveRecord collections
98
112
  test_files: []