cwninja-has_related 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +38 -0
- data/lib/has_related.rb +110 -0
- data/rails/init.rb +5 -0
- data/test/dataset_generation_test.rb +32 -0
- data/test/test_helper.rb +22 -0
- metadata +58 -0
data/README.markdown
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
Has related is used by Reevoo to create similar product recommendations.
|
2
|
+
|
3
|
+
Example Index Builder:
|
4
|
+
|
5
|
+
namespace :apache do
|
6
|
+
desc "Parse the last apache logs for similar products"
|
7
|
+
task :find_similar_products => :environment do
|
8
|
+
log_files = ENV["LOG_FILES"] || "/var/log/httpd/access.*.gz"
|
9
|
+
|
10
|
+
prefs = Hash.new{|h,k| h[k] = Hash.new(0) }
|
11
|
+
|
12
|
+
Dir.glob(log_files) do |filename|
|
13
|
+
File.open(filename) do |f|
|
14
|
+
Zlib::GzipReader.new(f).each_line do |line|
|
15
|
+
product_id, ip = parse_log_line(line)
|
16
|
+
if product_id and ip
|
17
|
+
prefs[product_id][ip] += 1
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
HasRelated.dump_dataset(prefs, "Product")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
(You will need to define your own `parse_log_line` method)
|
28
|
+
|
29
|
+
|
30
|
+
Example Usage:
|
31
|
+
|
32
|
+
class Product < ActiveRecord::Base
|
33
|
+
has_related "related_products"
|
34
|
+
end
|
35
|
+
|
36
|
+
most_related_product = Product.related_products.first
|
37
|
+
|
38
|
+
Happy hunting.
|
data/lib/has_related.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
module HasRelated
|
3
|
+
def self.included(base)
|
4
|
+
base.extend(ClassMethods)
|
5
|
+
end
|
6
|
+
|
7
|
+
module ClassMethods
|
8
|
+
def has_related(method_name = "related_items")
|
9
|
+
class_eval do
|
10
|
+
define_method method_name do |count|
|
11
|
+
HasRelated.similar_items(self, count)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class << self
|
18
|
+
|
19
|
+
def file_for_class(klass)
|
20
|
+
File.join(Rails.root, "db", "similar_items_datasets", klass.to_s.underscore + ".bin")
|
21
|
+
end
|
22
|
+
|
23
|
+
def similar_items(item, count)
|
24
|
+
ids = similar_item_ids(item, count)
|
25
|
+
(item.class.find_all_by_id(ids) || []).sort_by{|item| ids.index(item.id) }.first(count)
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
def similar_item_ids(item, count = nil)
|
30
|
+
dataset = similar_items_dataset(item.class.name) || {}
|
31
|
+
rankings = dataset[item.id] || []
|
32
|
+
rankings = rankings.first(count) if count
|
33
|
+
rankings.map{|_, id| id}
|
34
|
+
end
|
35
|
+
|
36
|
+
def similarity(item1, prefs_item1, item2, prefs_item2, people, all_people)
|
37
|
+
people.inject(0){|acc, person|
|
38
|
+
acc + prefs_item2[person] * prefs_item1[person]
|
39
|
+
} / all_people.size.to_f
|
40
|
+
end
|
41
|
+
|
42
|
+
def generate_dataset(prefs, &block)
|
43
|
+
all_results = {}
|
44
|
+
items = prefs.keys
|
45
|
+
|
46
|
+
prefs.each do |item1, item1_prefs|
|
47
|
+
agregated_recomendation_map = []
|
48
|
+
|
49
|
+
item1_people = item1_prefs.keys
|
50
|
+
|
51
|
+
prefs.each do |item2, item2_prefs|
|
52
|
+
item2_people = item2_prefs.keys
|
53
|
+
common_people = item1_people & item2_people
|
54
|
+
all_people = item1_people | item2_people
|
55
|
+
|
56
|
+
unless item1 == item2
|
57
|
+
item_similarity = similarity(item1, item1_prefs, item2, item2_prefs, common_people, all_people)
|
58
|
+
agregated_recomendation_map << [item_similarity, item2] if item_similarity > 0
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
all_results[item1] = agregated_recomendation_map.sort_by{|count, item1| -count}.first(16) if agregated_recomendation_map.any?
|
63
|
+
|
64
|
+
yield [item1, agregated_recomendation_map] if block_given?
|
65
|
+
end
|
66
|
+
|
67
|
+
return all_results
|
68
|
+
end
|
69
|
+
|
70
|
+
def dump_dataset(prefs, klass, &block)
|
71
|
+
ensure_data_dir_exists!(klass)
|
72
|
+
dataset = generate_dataset(prefs, &block)
|
73
|
+
write_dataset_to_disk(dataset, klass)
|
74
|
+
end
|
75
|
+
|
76
|
+
def dump_grouped_datasets(grouped_prefs, klass, &block)
|
77
|
+
ensure_data_dir_exists!(klass)
|
78
|
+
dataset = Hash.new
|
79
|
+
grouped_prefs.each do |id, prefs|
|
80
|
+
dataset.merge! generate_dataset(prefs, &block)
|
81
|
+
end
|
82
|
+
write_dataset_to_disk(dataset, klass)
|
83
|
+
end
|
84
|
+
|
85
|
+
def similar_items_dataset(klass_name)
|
86
|
+
@similar_items_dataset ||= {}
|
87
|
+
return @similar_items_dataset[klass_name] if @similar_items_dataset[klass_name]
|
88
|
+
|
89
|
+
if File.readable? file_for_class(klass_name)
|
90
|
+
@similar_items_dataset[klass_name] = Marshal.load(File.open(file_for_class(klass_name)))
|
91
|
+
else
|
92
|
+
@similar_items_dataset[klass_name] = {}
|
93
|
+
end
|
94
|
+
|
95
|
+
return @similar_items_dataset[klass_name]
|
96
|
+
end
|
97
|
+
|
98
|
+
private
|
99
|
+
|
100
|
+
def ensure_data_dir_exists!(klass)
|
101
|
+
FileUtils.mkdir_p(File.dirname(file_for_class(klass))) unless File.directory? File.dirname(file_for_class(klass))
|
102
|
+
end
|
103
|
+
|
104
|
+
def write_dataset_to_disk(dataset, klass)
|
105
|
+
File.open(file_for_class(klass), "w") do |io|
|
106
|
+
Marshal.dump(dataset, io)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
data/rails/init.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class DatasetGenerationTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
prefs = {
|
6
|
+
:p1 => {:u1 => 1, :u2 => 1, :u3 => 1},
|
7
|
+
:p2 => {:u2 => 1},
|
8
|
+
:p3 => {:u3 => 1, :u5 => 1},
|
9
|
+
:p4 => {:u1 => 1, :u4 => 1},
|
10
|
+
:p5 => {:u1 => 1, :u4 => 1, :u5 => 1}
|
11
|
+
}
|
12
|
+
@rv = HasRelated.generate_dataset(prefs)
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_tied_game
|
16
|
+
assert [:p2, :p3, :p4].include?( @rv[:p1].first.last )
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_one_of_many
|
20
|
+
assert_equal :p1, @rv[:p2].first.last
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_clear_winner
|
24
|
+
assert_equal :p4, @rv[:p5].first.last
|
25
|
+
assert_equal :p3, @rv[:p5][1].last
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_no_relation
|
29
|
+
assert !@rv[:p3].map{|v| v.last }.include?( :p4 )
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__) + '/../lib')
|
2
|
+
require 'rubygems'
|
3
|
+
require 'test/unit'
|
4
|
+
require 'fileutils'
|
5
|
+
|
6
|
+
class Test::Unit::TestCase #:nodoc:
|
7
|
+
end
|
8
|
+
|
9
|
+
module Rails
|
10
|
+
def root
|
11
|
+
if @tmpdir.nil?
|
12
|
+
@tmpdir = File.join(ENV["TMPDIR"], "has_related")
|
13
|
+
Dir.mkdir(@tmpdir) unless File.directory? @tmpdir
|
14
|
+
ObjectSpace.define_finalizer(@tmpdir, proc{|id| FileUtils.remove_entry_secure @tmpdir })
|
15
|
+
end
|
16
|
+
@tmpdir
|
17
|
+
end
|
18
|
+
|
19
|
+
extend self
|
20
|
+
end
|
21
|
+
|
22
|
+
require "#{File.dirname(__FILE__)}/../init"
|
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cwninja-has_related
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Tom Lea
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-05-18 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: contrib@tomlea.co.uk
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.markdown
|
24
|
+
files:
|
25
|
+
- README.markdown
|
26
|
+
- test/dataset_generation_test.rb
|
27
|
+
- test/test_helper.rb
|
28
|
+
- lib/has_related.rb
|
29
|
+
- rails/init.rb
|
30
|
+
has_rdoc: true
|
31
|
+
homepage: http://tomlea.co.uk
|
32
|
+
post_install_message:
|
33
|
+
rdoc_options:
|
34
|
+
- --main
|
35
|
+
- README.markdown
|
36
|
+
require_paths:
|
37
|
+
- lib
|
38
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: "0"
|
43
|
+
version:
|
44
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: "0"
|
49
|
+
version:
|
50
|
+
requirements: []
|
51
|
+
|
52
|
+
rubyforge_project: has_related
|
53
|
+
rubygems_version: 1.2.0
|
54
|
+
signing_key:
|
55
|
+
specification_version: 2
|
56
|
+
summary: Finds similar items based on user demand.
|
57
|
+
test_files: []
|
58
|
+
|