jruby_mahout 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ .bundle/
2
+ .idea/
3
+ .DS_Store
4
+ log/*.log
5
+ pkg/
6
+ .rbenv-version
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "http://rubygems.org"
2
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,27 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ jruby_mahout (0.2.0)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.1.3)
10
+ rake (10.0.1)
11
+ rspec (2.12.0)
12
+ rspec-core (~> 2.12.0)
13
+ rspec-expectations (~> 2.12.0)
14
+ rspec-mocks (~> 2.12.0)
15
+ rspec-core (2.12.0)
16
+ rspec-expectations (2.12.0)
17
+ diff-lcs (~> 1.1.3)
18
+ rspec-mocks (2.12.0)
19
+
20
+ PLATFORMS
21
+ java
22
+ ruby
23
+
24
+ DEPENDENCIES
25
+ jruby_mahout!
26
+ rake
27
+ rspec
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2012 Vasily Vasinov
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,30 @@
1
+ # Jruby Mahout
2
+ Jruby Mahout is a gem that unleashes the power of Apache Mahout in the world of Jruby. Mahout is a superior machine learning library written in Java. It deals with recommendations, clustering and classification machine learning problems at scale. Until now it was difficult to use it in Ruby projects. You'd have to implement Java interfaces in Jruby yourself, which is not quick especially if you just started exploring the world of machine learning.
3
+
4
+ The goal of this library is to make machine learning at scale in Jruby projects simple.
5
+
6
+ ## Quick Overview
7
+ This is an early version of a Jruby gem that only supports Mahout recommendations. It also includes a simple Postgres manager that can be used to manage appropriate recommendations tables. Unfortunately it's impossible to use ActiveRecord (AR) with Mahout, because AR at a mach higher level and creates a lot of overhead that is critical when dealing with millions of records in real time.
8
+
9
+ ## Get Mahout
10
+ First of all you need to download Mahout library from one of the [mirrors](http://www.apache.org/dyn/closer.cgi/mahout/). Jruby Mahout only supports Mahout 0.7 at this point.
11
+
12
+ ## Get Postgres JDBC Adapter
13
+ If you wish to work with a database for recommendations, you'll have to install [JDBC driver for Postgres](http://jdbc.postgresql.org/download.html). Another option is to use file-based recommendation.
14
+
15
+ ## Installation
16
+ ### 1. Set environment variable MAHOUT_DIR to point at your Mahout installation.
17
+ ### 2. Add the gem to your `Gemfile`
18
+ ```ruby
19
+ platform :jruby do
20
+ gem "jruby_mahout"
21
+ end
22
+ ```
23
+ And run `bundle install`.
24
+
25
+ ## Contribute
26
+ - Fork the project.
27
+ - Write code for a feature or bug fix.
28
+ - Add Rspec tests for it.
29
+ - Commit, do not make changes to rakefile or version.
30
+ - Submit a pull request.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new('spec')
5
+
6
+ task :default => :spec
@@ -0,0 +1,22 @@
1
+ $LOAD_PATH << File.expand_path("../lib", __FILE__)
2
+ require "jruby_mahout/version"
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "jruby_mahout"
6
+ gem.version = JrubyMahout::VERSION
7
+ gem.authors = ["Vasily Vasinov"]
8
+ gem.email = ["vasinov@me.com"]
9
+ gem.homepage = "https://github.com/vasinov/jruby_mahout"
10
+ gem.summary = "Jruby Mahout is a gem that unleashes the power of Apache Mahout in the world of Jruby."
11
+ gem.description = "Jruby Mahout is a gem that unleashes the power of Apache Mahout in the world of Jruby. Mahout is a superior machine learning library written in Java. It deals with recommendations, clustering and classification machine learning problems at scale. Until now it was difficult to use it in Ruby projects. You'd have to implement Java interfaces in Jruby yourself, which is not quick especially if you just started exploring the world of machine learning."
12
+ gem.license = "MIT"
13
+
14
+ gem.files = Dir["{lib}/**/*"] + ["MIT-LICENSE", "README.md"]
15
+ gem.test_files = Dir["spec/**/*"]
16
+ gem.files = `git ls-files`.split($/)
17
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_development_dependency "rake"
21
+ gem.add_development_dependency "rspec"
22
+ end
@@ -0,0 +1,8 @@
1
+ module JrubyMahout
2
+ require 'java'
3
+ require File.join(ENV["MAHOUT_DIR"], 'mahout-core-0.7.jar')
4
+ require File.join(ENV["MAHOUT_DIR"], 'mahout-integration-0.7.jar')
5
+ require File.join(ENV["MAHOUT_DIR"], 'mahout-math-0.7.jar')
6
+ Dir.glob(File.join(ENV["MAHOUT_DIR"], 'lib/*.jar')).each { |d| require d }
7
+ Dir['./lib/jruby_mahout/*.rb'].each{ |f| require f }
8
+ end
@@ -0,0 +1,20 @@
1
+ module JrubyMahout
2
+ class DataModel
3
+ attr_accessor :data_model
4
+
5
+ def initialize(data_model_type, params)
6
+ case data_model_type
7
+ when "file"
8
+ @data_model = FileDataModel.new(java.io.File.new(params[:file_path]))
9
+ when "mysql"
10
+ # TODO: implement
11
+ @data_model = nil
12
+ when "postgres"
13
+ @data_model = PostgresManager.new(params).setup_data_model(params)
14
+ else
15
+ @data_model = nil
16
+ end
17
+ end
18
+ end
19
+ end
20
+
@@ -0,0 +1,18 @@
1
+ module JrubyMahout
2
+ class Evaluator
3
+ def initialize(data_model, recommender_builder)
4
+ @data_model = data_model
5
+ @recommender_builder = recommender_builder
6
+ @mahout_evaluator = AverageAbsoluteDifferenceRecommenderEvaluator.new()
7
+ end
8
+
9
+ def evaluate(training_percentage, evaluation_percentage)
10
+ if @recommender_builder.recommender_name == "GenericItemBasedRecommender" and !@recommender_builder.item_based_allowed
11
+ nil
12
+ else
13
+ Float(@mahout_evaluator.evaluate(@recommender_builder, nil, @data_model, training_percentage, evaluation_percentage))
14
+ end
15
+ end
16
+ end
17
+ end
18
+
@@ -0,0 +1,34 @@
1
+ # Recommenders
2
+ java_import org.apache.mahout.cf.taste.eval.RecommenderBuilder
3
+ java_import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity
4
+ java_import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity
5
+ java_import org.apache.mahout.cf.taste.impl.similarity.SpearmanCorrelationSimilarity
6
+ java_import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity
7
+ java_import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity
8
+ java_import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity
9
+
10
+ # Neighborhoods
11
+ java_import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood
12
+
13
+ # Recommenders
14
+ java_import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender
15
+ java_import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender
16
+ java_import org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender
17
+
18
+ # Weighting
19
+ java_import org.apache.mahout.cf.taste.common.Weighting
20
+
21
+ # Evaluators
22
+ java_import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator
23
+
24
+ # Data Models
25
+ java_import org.apache.mahout.cf.taste.impl.model.jdbc.PostgreSQLJDBCDataModel
26
+ java_import org.apache.mahout.cf.taste.impl.model.file.FileDataModel
27
+
28
+
29
+ # Postgres
30
+ begin
31
+ java_import org.postgresql.ds.PGPoolingDataSource
32
+ rescue Exception => e
33
+ puts e
34
+ end
@@ -0,0 +1,5 @@
1
+ module JrubyMahout
2
+ class MysqlManager
3
+ # TODO: implement
4
+ end
5
+ end
@@ -0,0 +1,76 @@
1
+ module JrubyMahout
2
+ class PostgresManager
3
+ attr_accessor :data_model, :data_source, :statement
4
+
5
+ def initialize(params)
6
+ @data_source = PGPoolingDataSource.new()
7
+ @data_source.setUser(params[:username])
8
+ @data_source.setPassword(params[:password])
9
+ @data_source.setServerName(params[:host])
10
+ @data_source.setPortNumber(params[:port])
11
+ @data_source.setDatabaseName(params[:db_name])
12
+ end
13
+
14
+ def setup_data_model(params)
15
+ begin
16
+ @data_model = PostgreSQLJDBCDataModel.new(@data_source, params[:table_name], "user_id", "item_id", "rating", "created")
17
+ rescue Exception => e
18
+ puts e
19
+ end
20
+ end
21
+
22
+ def create_statement
23
+ begin
24
+ connection = @data_source.getConnection()
25
+ @statement = connection.createStatement()
26
+ rescue Exception => e
27
+ puts e
28
+ end
29
+ end
30
+
31
+ def close_data_source
32
+ begin
33
+ @data_source.close()
34
+ rescue Exception => e
35
+ puts e
36
+ end
37
+ end
38
+
39
+ def upsert_record(record, name)
40
+ begin
41
+ @statement.execute("UPDATE #{name} SET user_id=#{record[:user_id]}, item_id=#{record[:item_id]}, rating=#{record[:rating]} WHERE user_id=#{record[:user_id]} AND item_id=#{record[:item_id]};")
42
+ @statement.execute("INSERT INTO #{name} (user_id, item_id, rating) SELECT #{record[:user_id]}, #{record[:item_id]}, #{record[:rating]} WHERE NOT EXISTS (SELECT 1 FROM #{name} WHERE user_id=#{record[:user_id]} AND item_id=#{record[:item_id]});")
43
+ rescue java.sql.SQLException => e
44
+ puts e
45
+ end
46
+ end
47
+
48
+ def create_table(name)
49
+ begin
50
+ @statement.executeUpdate("
51
+ CREATE TABLE #{name} (
52
+ user_id BIGINT NOT NULL,
53
+ item_id BIGINT NOT NULL,
54
+ rating int NOT NULL,
55
+ created TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
56
+ PRIMARY KEY (user_id, item_id)
57
+ );
58
+ ")
59
+ @statement.executeUpdate("CREATE INDEX #{name}_user_id_index ON #{name} (user_id);")
60
+ @statement.executeUpdate("CREATE INDEX #{name}_item_id_index ON #{name} (item_id);")
61
+ rescue java.sql.SQLException => e
62
+ puts e
63
+ end
64
+ end
65
+
66
+ def delete_table(name)
67
+ begin
68
+ @statement.executeUpdate("DROP INDEX IF EXISTS #{name}_user_id_index;")
69
+ @statement.executeUpdate("DROP INDEX IF EXISTS #{name}_item_id_index;")
70
+ @statement.executeUpdate("DROP TABLE IF EXISTS #{name};")
71
+ rescue java.sql.SQLException => e
72
+ puts e
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,78 @@
1
+ module JrubyMahout
2
+ class Recommender
3
+ attr_accessor :is_weighted, :neighborhood_size, :similarity_name, :recommender_name, :data_model
4
+
5
+ def initialize(similarity_name, neighborhood_size, recommender_name, is_weighted)
6
+ @is_weighted = is_weighted
7
+ @neighborhood_size = neighborhood_size
8
+ @similarity_name = similarity_name
9
+ @recommender_name = recommender_name
10
+ @recommender_builder = RecommenderBuilder.new(@similarity_name,
11
+ @neighborhood_size,
12
+ @recommender_name,
13
+ @is_weighted)
14
+ @data_model = nil
15
+ @recommender = nil
16
+ end
17
+
18
+ def data_model=(data_model)
19
+ @data_model = data_model
20
+ @recommender = @recommender_builder.buildRecommender(@data_model)
21
+ end
22
+
23
+ def recommend(user_id, number_of_items, rescorer)
24
+ if @recommender.nil?
25
+ nil
26
+ else
27
+ recommendations_to_array(@recommender.recommend(user_id, number_of_items, rescorer))
28
+ end
29
+ end
30
+
31
+ def evaluate(training_percentage, evaluation_percentage)
32
+ evaluator = Evaluator.new(@data_model, @recommender_builder)
33
+ evaluator.evaluate(training_percentage, evaluation_percentage)
34
+ end
35
+
36
+ def similar_items(item_id, number_of_items, rescorer)
37
+ if @recommender.nil? or @recommender_name == "GenericItemBasedRecommender"
38
+ nil
39
+ else
40
+ @recommender.mostSimilarItems(item_id, number_of_items, rescorer)
41
+ end
42
+ end
43
+
44
+ def similar_users(user_id, number_of_items, rescorer)
45
+ if @recommender.nil? or @recommender_name == "GenericUserBasedRecommender"
46
+ nil
47
+ else
48
+ @recommender.mostSimilarUserIDs(user_id, amount, rescorer)
49
+ end
50
+ end
51
+
52
+ def estimate_preference(user_id, item_id)
53
+ if @recommender.nil?
54
+ nil
55
+ else
56
+ @recommender.estimatePreference(user_id, item_id)
57
+ end
58
+ end
59
+
60
+ def recommended_because(user_id, item_id, number_of_items)
61
+ if @recommender.nil? or @recommender_name == "GenericItemBasedRecommender"
62
+ nil
63
+ else
64
+ @recommender.recommendedBecause(user_id, item_id, number_of_items)
65
+ end
66
+ end
67
+
68
+ private
69
+ def recommendations_to_array(recommendations)
70
+ recommendations_array = []
71
+ recommendations.each do |recommendation|
72
+ recommendations_array << [recommendation.getItemID, recommendation.getValue.round(5)]
73
+ end
74
+
75
+ recommendations_array
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,56 @@
1
+ module JrubyMahout
2
+ class RecommenderBuilder
3
+ attr_accessor :recommender_name, :item_based_allowed
4
+ # public interface RecommenderBuilder
5
+ # Implementations of this inner interface are simple helper classes which create a Recommender to be evaluated based on the given DataModel.
6
+ def initialize(similarity_name, neighborhood_size, recommender_name, is_weighted)
7
+ @is_weighted = is_weighted
8
+ @neighborhood_size = neighborhood_size
9
+ @similarity_name = similarity_name
10
+ @recommender_name = recommender_name
11
+ @item_based_allowed = (@similarity_name == "SpearmanCorrelationSimilarity") ? false : true
12
+ end
13
+
14
+ # buildRecommender(DataModel dataModel)
15
+ # Builds a Recommender implementation to be evaluated, using the given DataModel.
16
+ def buildRecommender(data_model)
17
+ begin
18
+ case @similarity_name
19
+ when "PearsonCorrelationSimilarity"
20
+ similarity = (@is_weighted) ? PearsonCorrelationSimilarity.new(data_model, Weighting::WEIGHTED) : PearsonCorrelationSimilarity.new(data_model)
21
+ when "EuclideanDistanceSimilarity"
22
+ similarity = (@is_weighted) ? EuclideanDistanceSimilarity.new(data_model, Weighting::WEIGHTED) : EuclideanDistanceSimilarity.new(data_model)
23
+ when "SpearmanCorrelationSimilarity"
24
+ similarity = SpearmanCorrelationSimilarity.new(data_model)
25
+ when "LogLikelihoodSimilarity"
26
+ similarity = LogLikelihoodSimilarity.new(data_model)
27
+ when "TanimotoCoefficientSimilarity"
28
+ similarity = TanimotoCoefficientSimilarity.new(data_model)
29
+ when "GenericItemSimilarity"
30
+ similarity = PearsonCorrelationSimilarity.new(data_model, Weighting::WEIGHTED)
31
+ else
32
+ similarity = nil
33
+ end
34
+
35
+ unless @neighborhood_size.nil?
36
+ neighborhood = NearestNUserNeighborhood.new(Integer(@neighborhood_size), similarity, data_model)
37
+ end
38
+
39
+ case @recommender_name
40
+ when "GenericUserBasedRecommender"
41
+ recommender = GenericUserBasedRecommender.new(data_model, neighborhood, similarity)
42
+ when "GenericItemBasedRecommender"
43
+ recommender = (@item_based_allowed) ? GenericItemBasedRecommender.new(data_model, similarity) : nil
44
+ when "SlopeOneRecommender"
45
+ recommender = SlopeOneRecommender.new(data_model)
46
+ else
47
+ recommender = nil
48
+ end
49
+
50
+ recommender
51
+ rescue Exception => e
52
+ return e
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,3 @@
1
+ module JrubyMahout
2
+ VERSION = '0.2.0'
3
+ end