rugroupy 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Ryan Wynn
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,42 @@
1
+ = rugroupy
2
+
3
+ rugroupy is a ruby library which interacts with mongodb allowing one to tag entities and perform queries
4
+ to determine similarity between entities. rugroupy finds similarities by performing a series of map-reduce
5
+ operations using mongo. By using mongo's auto sharding capabilities rugroupy's grouping operation can
6
+ be distributed between multiple servers.
7
+
8
+ == features
9
+ * find entities most similiar to a given entity
10
+ * find the most similiar entities over the entire entity set
11
+ * use javascript to include only certain tags in the grouping operation
12
+ * use javascript to assign custom scoring to certain tags
13
+ * use javascript to create dynamic tags
14
+
15
+ == Examples
16
+
17
+ See http://github.com/rwynn/rugroupy/tree/master/test
18
+
19
+ == Requirements
20
+
21
+ * Mongo http://www.mongodb.org/downloads
22
+ * Mongo Ruby Driver http://www.mongodb.org/display/DOCS/Ruby+Language+Center#RubyLanguageCenter-RubyDriver
23
+
24
+ == Install
25
+
26
+ * sudo gem install rugroupy
27
+
28
+ == Contributing to rugroupy
29
+
30
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
31
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
32
+ * Fork the project
33
+ * Start a feature/bugfix branch
34
+ * Commit and push until you are happy with your contribution
35
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
36
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
37
+
38
+ == Copyright
39
+
40
+ Copyright (c) 2011 Ryan Wynn. See LICENSE.txt for
41
+ further details.
42
+
@@ -0,0 +1,86 @@
1
+ require 'mongo'
2
+ require 'bson'
3
+
4
+ module Groupy
5
+
6
+ class Entity
7
+ def initialize(database, name, entity_id, create=true)
8
+ @database, @name, @entity_id = database, name, entity_id
9
+ @database.create_collection(@name) if not @database.collection_names.member?(@name)
10
+
11
+ @database["#{@name}_count"].ensure_index([['_id.e', Mongo::DESCENDING],
12
+ ['_id.tag', Mongo::DESCENDING],
13
+ ['value.count', Mongo::DESCENDING]], :background => false)
14
+
15
+ if create
16
+ begin
17
+ doc = Hash["_id"=>@entity_id, "tags"=>Hash.new]
18
+ @database[@name].insert(doc, :safe=>true)
19
+ rescue Mongo::MongoDBError => e
20
+ end
21
+ end
22
+ end
23
+
24
+ def get
25
+ @database[@name].find_one({ "_id" => @entity_id })
26
+ end
27
+
28
+ def delete
29
+ @database[@name].remove({"_id" => @entity_id}, :safe => true)
30
+ nil
31
+ end
32
+
33
+ def clear_tags
34
+ spec = {"_id" => @entity_id }
35
+ doc = {"$set" => { "tags" => Hash.new } }
36
+ @database[@name].update(spec, doc, :safe=>true)
37
+ nil
38
+ end
39
+
40
+ def has_tag(tag, value)
41
+ e = self.get()
42
+ e['tags'].member?(tag) and e['tags'][tag].member?(value)
43
+ end
44
+
45
+ def tag(tag, value)
46
+ self.apply_tag(tag, value)
47
+ nil
48
+ end
49
+
50
+ def untag(tag, value)
51
+ self.apply_tag(tag, value, add=false)
52
+ nil
53
+ end
54
+
55
+ def apply_tag(tag, value, add=true)
56
+ op = add ? "$addToSet" : "$pull"
57
+ doc = Hash.new
58
+ field = "tags.#{tag}"
59
+ if value.is_a?(String)
60
+ doc[op] = { field => value }
61
+ else
62
+ op = "$pullAll" unless add
63
+ doc[op] = add ? {field => {"$each" => value}} : {field => value}
64
+ end
65
+ spec = Hash["_id" => @entity_id]
66
+ @database[@name].update(spec, doc, :safe=>true)
67
+ nil
68
+ end
69
+
70
+ def similiar(tag=nil, skip=nil, limit=nil, reverse=false)
71
+ q = BSON::OrderedHash.new
72
+ q["_id.e"] = @entity_id
73
+ q["_id.tag"] = tag ? tag : {"$exists" => false}
74
+ cursor = @database["#{@name}_count"].find(q, :fields => {"_id.e" => 1})
75
+ cursor.skip(skip) if skip
76
+ cursor.limit(limit) if limit
77
+ cursor.sort("value.count", reverse ? Mongo::ASCENDING : Mongo::DESCENDING)
78
+ cursor.collect do |r|
79
+ pair = r["_id"]["e"]
80
+ pair[0] == @entity_id ? pair[1] : pair[0]
81
+ end
82
+ end
83
+
84
+ end
85
+ end
86
+
@@ -0,0 +1,101 @@
1
+ require 'mongo'
2
+ require 'bson'
3
+
4
+ module Groupy
5
+
6
+ class EntityGrouper
7
+
8
+ @@defaultScoreFunction = "function(tag) { return 1; }"
9
+ @@defaultIncludeFunction = "function(tag) { return true; }"
10
+ @@dynamicTagFunction = "function(doc) {}"
11
+
12
+ def initialize(database, entity)
13
+ @database, @entity = database, entity
14
+ @database["#{@entity}_count"].ensure_index([['_id.tag', Mongo::DESCENDING],
15
+ ['value.count', Mongo::DESCENDING]], :background => false)
16
+ end
17
+
18
+ def similiar(tag=nil, skip=nil, limit=nil, reverse=false)
19
+ q = BSON::OrderedHash.new
20
+ q["_id.tag"] = tag ? tag : {"$exists" => false}
21
+ cursor = @database["#{@entity}_count"].find(q, :fields => {"_id.e" => 1})
22
+ cursor.skip(skip) if skip
23
+ cursor.limit(limit) if limit
24
+ cursor.sort("value.count", reverse ? Mongo::ASCENDING : Mongo::DESCENDING)
25
+ cursor.collect { |r| r["_id"]["e"] }
26
+ end
27
+
28
+ def group(options={})
29
+ self.invert_entities(options[:includeFunction] || @@defaultIncludeFunction,
30
+ options[:dynamicTagFunction] || @@dynamicTagFunction)
31
+ self.count_entities(options[:scoreFunction] || @@defaultScoreFunction)
32
+ end
33
+
34
+ def count_entities(scoreFunction)
35
+ map = BSON::Code.new(<<eos)
36
+ function() {
37
+ score = #{scoreFunction};
38
+ tag = this._id.tag;
39
+ tagScore = score(tag);
40
+ entities = this.value.entities.slice(0).sort();
41
+ for (x in entities) {
42
+ for (y in entities) {
43
+ if (x < y) {
44
+ emit({tag:tag, e:[entities[x], entities[y]]}, {count:tagScore});
45
+ emit({e:[entities[x], entities[y]]}, {count:tagScore});
46
+ }
47
+ }
48
+ }
49
+ }
50
+ eos
51
+
52
+ reduce = BSON::Code.new(<<eos)
53
+ function(key, values) {
54
+ result = {count:0};
55
+ values.forEach(function(value) {
56
+ result.count += value.count;
57
+ });
58
+ return result;
59
+ }
60
+ eos
61
+
62
+ @database["#{@entity}_invert"].map_reduce(map, reduce, :out => "#{@entity}_count")
63
+ nil
64
+ end
65
+
66
+ def invert_entities(includeFunction, dynamicTagFunction)
67
+ map = BSON::Code.new(<<eos)
68
+ function() {
69
+ include = #{includeFunction};
70
+ dynamicTagFunction = #{dynamicTagFunction};
71
+ entity_id = this._id;
72
+ if (this.tags) {
73
+ for (tag in this.tags) {
74
+ if (!include(tag)) continue;
75
+ this.tags[tag].forEach(function(z) {
76
+ emit({tag:tag, value:z}, {entities: [entity_id]});
77
+ });
78
+ }
79
+ dynamicTagFunction(this);
80
+ }
81
+ }
82
+ eos
83
+
84
+ reduce = BSON::Code.new(<<eos)
85
+ function(key, values) {
86
+ result = {entities:[]};
87
+ values.forEach(function(value) {
88
+ value['entities'].forEach(function(entity_id) {
89
+ result['entities'].push( entity_id );
90
+ });
91
+ });
92
+ return result;
93
+ }
94
+ eos
95
+
96
+ @database[@entity].map_reduce(map, reduce, :out => "#{@entity}_invert")
97
+ nil
98
+ end
99
+
100
+ end
101
+ end
data/lib/rugroupy.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'rugroupy/entity'
2
+ require 'rugroupy/group'
metadata ADDED
@@ -0,0 +1,193 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rugroupy
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Ryan Wynn
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-08-17 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ requirement: &id001 !ruby/object:Gem::Requirement
22
+ none: false
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ hash: 3
27
+ segments:
28
+ - 0
29
+ version: "0"
30
+ version_requirements: *id001
31
+ name: shoulda
32
+ prerelease: false
33
+ type: :development
34
+ - !ruby/object:Gem::Dependency
35
+ requirement: &id002 !ruby/object:Gem::Requirement
36
+ none: false
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ hash: 23
41
+ segments:
42
+ - 1
43
+ - 0
44
+ - 0
45
+ version: 1.0.0
46
+ version_requirements: *id002
47
+ name: bundler
48
+ prerelease: false
49
+ type: :development
50
+ - !ruby/object:Gem::Dependency
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ~>
55
+ - !ruby/object:Gem::Version
56
+ hash: 7
57
+ segments:
58
+ - 1
59
+ - 6
60
+ - 4
61
+ version: 1.6.4
62
+ version_requirements: *id003
63
+ name: jeweler
64
+ prerelease: false
65
+ type: :development
66
+ - !ruby/object:Gem::Dependency
67
+ requirement: &id004 !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ version_requirements: *id004
77
+ name: rcov
78
+ prerelease: false
79
+ type: :development
80
+ - !ruby/object:Gem::Dependency
81
+ requirement: &id005 !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ hash: 25
87
+ segments:
88
+ - 1
89
+ - 3
90
+ - 1
91
+ version: 1.3.1
92
+ version_requirements: *id005
93
+ name: mongo
94
+ prerelease: false
95
+ type: :development
96
+ - !ruby/object:Gem::Dependency
97
+ requirement: &id006 !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ hash: 25
103
+ segments:
104
+ - 1
105
+ - 3
106
+ - 1
107
+ version: 1.3.1
108
+ version_requirements: *id006
109
+ name: bson
110
+ prerelease: false
111
+ type: :development
112
+ - !ruby/object:Gem::Dependency
113
+ requirement: &id007 !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ hash: 25
119
+ segments:
120
+ - 1
121
+ - 3
122
+ - 1
123
+ version: 1.3.1
124
+ version_requirements: *id007
125
+ name: bson_ext
126
+ prerelease: false
127
+ type: :development
128
+ - !ruby/object:Gem::Dependency
129
+ requirement: &id008 !ruby/object:Gem::Requirement
130
+ none: false
131
+ requirements:
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ hash: 25
135
+ segments:
136
+ - 1
137
+ - 2
138
+ - 3
139
+ version: 1.2.3
140
+ version_requirements: *id008
141
+ name: SystemTimer
142
+ prerelease: false
143
+ type: :development
144
+ description: a library which uses the map-reduce capabilities of mongodb to group entities based on tags.
145
+ email: ryan.m.wynn@gmail.com
146
+ executables: []
147
+
148
+ extensions: []
149
+
150
+ extra_rdoc_files:
151
+ - LICENSE.txt
152
+ - README.rdoc
153
+ files:
154
+ - lib/rugroupy.rb
155
+ - lib/rugroupy/entity.rb
156
+ - lib/rugroupy/group.rb
157
+ - LICENSE.txt
158
+ - README.rdoc
159
+ homepage: http://github.com/rwynn/rugroupy
160
+ licenses:
161
+ - MIT
162
+ post_install_message:
163
+ rdoc_options: []
164
+
165
+ require_paths:
166
+ - lib
167
+ required_ruby_version: !ruby/object:Gem::Requirement
168
+ none: false
169
+ requirements:
170
+ - - ">="
171
+ - !ruby/object:Gem::Version
172
+ hash: 3
173
+ segments:
174
+ - 0
175
+ version: "0"
176
+ required_rubygems_version: !ruby/object:Gem::Requirement
177
+ none: false
178
+ requirements:
179
+ - - ">="
180
+ - !ruby/object:Gem::Version
181
+ hash: 3
182
+ segments:
183
+ - 0
184
+ version: "0"
185
+ requirements: []
186
+
187
+ rubyforge_project:
188
+ rubygems_version: 1.8.6
189
+ signing_key:
190
+ specification_version: 3
191
+ summary: find things that are similiar to things
192
+ test_files: []
193
+