cwninja-has_related 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown ADDED
@@ -0,0 +1,38 @@
1
+ Has related is used by Reevoo to create similar product recommendations.
2
+
3
+ Example Index Builder:
4
+
5
+ namespace :apache do
6
+ desc "Parse the last apache logs for similar products"
7
+ task :find_similar_products => :environment do
8
+ log_files = ENV["LOG_FILES"] || "/var/log/httpd/access.*.gz"
9
+
10
+ prefs = Hash.new{|h,k| h[k] = Hash.new(0) }
11
+
12
+ Dir.glob(log_files) do |filename|
13
+ File.open(filename) do |f|
14
+ Zlib::GzipReader.new(f).each_line do |line|
15
+ product_id, ip = parse_log_line(line)
16
+ if product_id and ip
17
+ prefs[product_id][ip] += 1
18
+ end
19
+ end
20
+ end
21
+ end
22
+
23
+ HasRelated.dump_dataset(prefs, "Product")
24
+ end
25
+ end
26
+
27
+ (You will need to define your own `parse_log_line` method)
28
+
29
+
30
+ Example Usage:
31
+
32
+ class Product < ActiveRecord::Base
33
+ has_related "related_products"
34
+ end
35
+
36
+ most_related_product = Product.related_products.first
37
+
38
+ Happy hunting.
@@ -0,0 +1,110 @@
1
+ require 'fileutils'
2
+ module HasRelated
3
+ def self.included(base)
4
+ base.extend(ClassMethods)
5
+ end
6
+
7
+ module ClassMethods
8
+ def has_related(method_name = "related_items")
9
+ class_eval do
10
+ define_method method_name do |count|
11
+ HasRelated.similar_items(self, count)
12
+ end
13
+ end
14
+ end
15
+ end
16
+
17
+ class << self
18
+
19
+ def file_for_class(klass)
20
+ File.join(Rails.root, "db", "similar_items_datasets", klass.to_s.underscore + ".bin")
21
+ end
22
+
23
+ def similar_items(item, count)
24
+ ids = similar_item_ids(item, count)
25
+ (item.class.find_all_by_id(ids) || []).sort_by{|item| ids.index(item.id) }.first(count)
26
+ end
27
+
28
+
29
+ def similar_item_ids(item, count = nil)
30
+ dataset = similar_items_dataset(item.class.name) || {}
31
+ rankings = dataset[item.id] || []
32
+ rankings = rankings.first(count) if count
33
+ rankings.map{|_, id| id}
34
+ end
35
+
36
+ def similarity(item1, prefs_item1, item2, prefs_item2, people, all_people)
37
+ people.inject(0){|acc, person|
38
+ acc + prefs_item2[person] * prefs_item1[person]
39
+ } / all_people.size.to_f
40
+ end
41
+
42
+ def generate_dataset(prefs, &block)
43
+ all_results = {}
44
+ items = prefs.keys
45
+
46
+ prefs.each do |item1, item1_prefs|
47
+ agregated_recomendation_map = []
48
+
49
+ item1_people = item1_prefs.keys
50
+
51
+ prefs.each do |item2, item2_prefs|
52
+ item2_people = item2_prefs.keys
53
+ common_people = item1_people & item2_people
54
+ all_people = item1_people | item2_people
55
+
56
+ unless item1 == item2
57
+ item_similarity = similarity(item1, item1_prefs, item2, item2_prefs, common_people, all_people)
58
+ agregated_recomendation_map << [item_similarity, item2] if item_similarity > 0
59
+ end
60
+ end
61
+
62
+ all_results[item1] = agregated_recomendation_map.sort_by{|count, item1| -count}.first(16) if agregated_recomendation_map.any?
63
+
64
+ yield [item1, agregated_recomendation_map] if block_given?
65
+ end
66
+
67
+ return all_results
68
+ end
69
+
70
+ def dump_dataset(prefs, klass, &block)
71
+ ensure_data_dir_exists!(klass)
72
+ dataset = generate_dataset(prefs, &block)
73
+ write_dataset_to_disk(dataset, klass)
74
+ end
75
+
76
+ def dump_grouped_datasets(grouped_prefs, klass, &block)
77
+ ensure_data_dir_exists!(klass)
78
+ dataset = Hash.new
79
+ grouped_prefs.each do |id, prefs|
80
+ dataset.merge! generate_dataset(prefs, &block)
81
+ end
82
+ write_dataset_to_disk(dataset, klass)
83
+ end
84
+
85
+ def similar_items_dataset(klass_name)
86
+ @similar_items_dataset ||= {}
87
+ return @similar_items_dataset[klass_name] if @similar_items_dataset[klass_name]
88
+
89
+ if File.readable? file_for_class(klass_name)
90
+ @similar_items_dataset[klass_name] = Marshal.load(File.open(file_for_class(klass_name)))
91
+ else
92
+ @similar_items_dataset[klass_name] = {}
93
+ end
94
+
95
+ return @similar_items_dataset[klass_name]
96
+ end
97
+
98
+ private
99
+
100
+ def ensure_data_dir_exists!(klass)
101
+ FileUtils.mkdir_p(File.dirname(file_for_class(klass))) unless File.directory? File.dirname(file_for_class(klass))
102
+ end
103
+
104
+ def write_dataset_to_disk(dataset, klass)
105
+ File.open(file_for_class(klass), "w") do |io|
106
+ Marshal.dump(dataset, io)
107
+ end
108
+ end
109
+ end
110
+ end
data/rails/init.rb ADDED
@@ -0,0 +1,5 @@
1
+ require File.dirname(__FILE__) + '/../lib/has_related'
2
+ if defined? ActiveRecord
3
+ ActiveRecord::Base.send(:include, HasRelated)
4
+ end
5
+
@@ -0,0 +1,32 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class DatasetGenerationTest < Test::Unit::TestCase
4
+ def setup
5
+ prefs = {
6
+ :p1 => {:u1 => 1, :u2 => 1, :u3 => 1},
7
+ :p2 => {:u2 => 1},
8
+ :p3 => {:u3 => 1, :u5 => 1},
9
+ :p4 => {:u1 => 1, :u4 => 1},
10
+ :p5 => {:u1 => 1, :u4 => 1, :u5 => 1}
11
+ }
12
+ @rv = HasRelated.generate_dataset(prefs)
13
+ end
14
+
15
+ def test_tied_game
16
+ assert [:p2, :p3, :p4].include?( @rv[:p1].first.last )
17
+ end
18
+
19
+ def test_one_of_many
20
+ assert_equal :p1, @rv[:p2].first.last
21
+ end
22
+
23
+ def test_clear_winner
24
+ assert_equal :p4, @rv[:p5].first.last
25
+ assert_equal :p3, @rv[:p5][1].last
26
+ end
27
+
28
+ def test_no_relation
29
+ assert !@rv[:p3].map{|v| v.last }.include?( :p4 )
30
+ end
31
+
32
+ end
@@ -0,0 +1,22 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
2
+ require 'rubygems'
3
+ require 'test/unit'
4
+ require 'fileutils'
5
+
6
+ class Test::Unit::TestCase #:nodoc:
7
+ end
8
+
9
+ module Rails
10
+ def root
11
+ if @tmpdir.nil?
12
+ @tmpdir = File.join(ENV["TMPDIR"], "has_related")
13
+ Dir.mkdir(@tmpdir) unless File.directory? @tmpdir
14
+ ObjectSpace.define_finalizer(@tmpdir, proc{|id| FileUtils.remove_entry_secure @tmpdir })
15
+ end
16
+ @tmpdir
17
+ end
18
+
19
+ extend self
20
+ end
21
+
22
+ require "#{File.dirname(__FILE__)}/../init"
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cwninja-has_related
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Tom Lea
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-05-18 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: contrib@tomlea.co.uk
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.markdown
24
+ files:
25
+ - README.markdown
26
+ - test/dataset_generation_test.rb
27
+ - test/test_helper.rb
28
+ - lib/has_related.rb
29
+ - rails/init.rb
30
+ has_rdoc: true
31
+ homepage: http://tomlea.co.uk
32
+ post_install_message:
33
+ rdoc_options:
34
+ - --main
35
+ - README.markdown
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: "0"
43
+ version:
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ requirements: []
51
+
52
+ rubyforge_project: has_related
53
+ rubygems_version: 1.2.0
54
+ signing_key:
55
+ specification_version: 2
56
+ summary: Finds similar items based on user demand.
57
+ test_files: []
58
+