loose_tight_dictionary 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,74 @@
|
|
1
|
+
class LooseTightDictionary
|
2
|
+
class CachedResult < ::ActiveRecord::Base
|
3
|
+
set_table_name :loose_tight_dictionary_cached_results
|
4
|
+
|
5
|
+
def self.create_table
|
6
|
+
connection.create_table :loose_tight_dictionary_cached_results do |t|
|
7
|
+
t.string :a_class
|
8
|
+
t.string :a
|
9
|
+
t.string :b_class
|
10
|
+
t.string :b
|
11
|
+
end
|
12
|
+
connection.add_index :loose_tight_dictionary_cached_results, [:a_class, :b_class, :a], :name => 'aba'
|
13
|
+
connection.add_index :loose_tight_dictionary_cached_results, [:a_class, :b_class, :b], :name => 'abb'
|
14
|
+
connection.add_index :loose_tight_dictionary_cached_results, [:a_class, :b_class, :a, :b], :name => 'abab'
|
15
|
+
reset_column_information
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.setup(from_scratch = false)
|
19
|
+
connection.drop_table :loose_tight_dictionary_cached_results if from_scratch and table_exists?
|
20
|
+
create_table unless table_exists?
|
21
|
+
end
|
22
|
+
|
23
|
+
module ActiveRecordBaseExtension
|
24
|
+
# required options:
|
25
|
+
# :primary_key - what to call on this class
|
26
|
+
# :foreign_key - what to call on the other class
|
27
|
+
def cache_loose_tight_dictionary_matches_with(other_active_record_class, options)
|
28
|
+
other = other_active_record_class.to_s.singularize.camelcase
|
29
|
+
me = name
|
30
|
+
if me < other
|
31
|
+
a = me
|
32
|
+
b = other
|
33
|
+
primary_key = :a
|
34
|
+
foreign_key = :b
|
35
|
+
else
|
36
|
+
a = other
|
37
|
+
b = me
|
38
|
+
primary_key = :b
|
39
|
+
foreign_key = :a
|
40
|
+
end
|
41
|
+
|
42
|
+
# def aircraft
|
43
|
+
define_method other.underscore.pluralize do
|
44
|
+
other.constantize.where options[:foreign_key] => send("#{other.underscore.pluralize}_foreign_keys")
|
45
|
+
end
|
46
|
+
|
47
|
+
# def flight_segments_foreign_keys
|
48
|
+
define_method "#{other.underscore.pluralize}_foreign_keys" do
|
49
|
+
fz = ::LooseTightDictionary::CachedResult.arel_table
|
50
|
+
sql = fz.project(fz[foreign_key]).where(fz["#{primary_key}_class".to_sym].eq(self.class.name).and(fz["#{foreign_key}_class".to_sym].eq(other)).and(fz[primary_key].eq(send(options[:primary_key])))).to_sql
|
51
|
+
connection.select_values sql
|
52
|
+
end
|
53
|
+
|
54
|
+
# def cache_aircraft!
|
55
|
+
define_method "cache_#{other.underscore.pluralize}!" do
|
56
|
+
other_class = other.constantize
|
57
|
+
primary_key_value = send options[:primary_key]
|
58
|
+
other_class.loose_tight_dictionary.find_all(primary_key_value).each do |other_instance|
|
59
|
+
attrs = {}
|
60
|
+
attrs[primary_key] = primary_key_value
|
61
|
+
attrs["#{primary_key}_class"] = self.class.name
|
62
|
+
attrs[foreign_key] = other_instance.send options[:foreign_key]
|
63
|
+
attrs["#{foreign_key}_class"] = other
|
64
|
+
unless ::LooseTightDictionary::CachedResult.exists? attrs
|
65
|
+
::LooseTightDictionary::CachedResult.create! attrs
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
::ActiveRecord::Base.extend ::LooseTightDictionary::CachedResult::ActiveRecordBaseExtension
|
@@ -18,6 +18,7 @@ class LooseTightDictionary
|
|
18
18
|
autoload :Wrapper, 'loose_tight_dictionary/wrapper'
|
19
19
|
autoload :Similarity, 'loose_tight_dictionary/similarity'
|
20
20
|
autoload :Score, 'loose_tight_dictionary/score'
|
21
|
+
autoload :CachedResult, 'loose_tight_dictionary/cached_result'
|
21
22
|
|
22
23
|
class Freed < RuntimeError; end
|
23
24
|
|
@@ -21,7 +21,11 @@ Gem::Specification.new do |s|
|
|
21
21
|
|
22
22
|
s.add_development_dependency "shoulda"
|
23
23
|
s.add_development_dependency "remote_table"
|
24
|
-
s.
|
24
|
+
s.add_development_dependency 'activerecord', '>=3'
|
25
|
+
s.add_development_dependency 'mysql'
|
26
|
+
s.add_development_dependency 'cohort_scope'
|
27
|
+
s.add_development_dependency 'weighted_average'
|
28
|
+
s.add_dependency 'activesupport', '>=3'
|
25
29
|
s.add_dependency 'amatch'
|
26
30
|
s.add_dependency 'to_regexp', '>=0.0.3'
|
27
31
|
end
|
data/test/test_cache.rb
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
require 'active_support/all'
|
4
|
+
require 'active_record'
|
5
|
+
require 'cohort_scope'
|
6
|
+
require 'weighted_average'
|
7
|
+
|
8
|
+
ActiveRecord::Base.establish_connection(
|
9
|
+
'adapter' => 'mysql',
|
10
|
+
'database' => 'loose_tight_dictionary_test',
|
11
|
+
'username' => 'root',
|
12
|
+
'password' => 'password'
|
13
|
+
)
|
14
|
+
|
15
|
+
# ActiveRecord::Base.logger = Logger.new $stderr
|
16
|
+
|
17
|
+
ActiveSupport::Inflector.inflections do |inflect|
|
18
|
+
inflect.uncountable 'aircraft'
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'loose_tight_dictionary/cached_result'
|
22
|
+
|
23
|
+
::LooseTightDictionary::CachedResult.setup(true)
|
24
|
+
::LooseTightDictionary::CachedResult.delete_all
|
25
|
+
|
26
|
+
class Aircraft < ActiveRecord::Base
|
27
|
+
set_primary_key :icao_code
|
28
|
+
|
29
|
+
cache_loose_tight_dictionary_matches_with :flight_segments, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
|
30
|
+
|
31
|
+
def aircraft_description
|
32
|
+
[manufacturer_name, model_name].compact.join(' ')
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.loose_tight_dictionary
|
36
|
+
@loose_tight_dictionary ||= LooseTightDictionary.new all, :haystack_reader => lambda { |straw| straw.aircraft_description }
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.create_table
|
40
|
+
connection.drop_table(:aircraft) rescue nil
|
41
|
+
connection.execute %{
|
42
|
+
CREATE TABLE `aircraft` (
|
43
|
+
`icao_code` varchar(255) DEFAULT NULL,
|
44
|
+
`manufacturer_name` varchar(255) DEFAULT NULL,
|
45
|
+
`model_name` varchar(255) DEFAULT NULL,
|
46
|
+
PRIMARY KEY (`icao_code`)
|
47
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
48
|
+
}
|
49
|
+
reset_column_information
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
class FlightSegment < ActiveRecord::Base
|
54
|
+
set_primary_key :row_hash
|
55
|
+
|
56
|
+
cache_loose_tight_dictionary_matches_with :aircraft, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
|
57
|
+
|
58
|
+
extend CohortScope
|
59
|
+
self.minimum_cohort_size = 1
|
60
|
+
|
61
|
+
def self.create_table
|
62
|
+
connection.drop_table(:flight_segments) rescue nil
|
63
|
+
connection.execute %{
|
64
|
+
CREATE TABLE `flight_segments` (
|
65
|
+
`row_hash` varchar(255) NOT NULL DEFAULT '',
|
66
|
+
`aircraft_description` varchar(255) DEFAULT NULL,
|
67
|
+
`passengers` int(11) DEFAULT NULL,
|
68
|
+
`seats` int(11) DEFAULT NULL,
|
69
|
+
PRIMARY KEY (`row_hash`)
|
70
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
71
|
+
}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
FlightSegment.create_table
|
76
|
+
Aircraft.create_table
|
77
|
+
|
78
|
+
a = Aircraft.new
|
79
|
+
a.icao_code = 'B742'
|
80
|
+
a.manufacturer_name = 'Boeing'
|
81
|
+
a.model_name = '747-200'
|
82
|
+
a.save!
|
83
|
+
|
84
|
+
fs = FlightSegment.new
|
85
|
+
fs.row_hash = 'madison to chicago'
|
86
|
+
fs.aircraft_description = 'BORING 747200'
|
87
|
+
fs.passengers = 10
|
88
|
+
fs.seats = 10
|
89
|
+
fs.save!
|
90
|
+
|
91
|
+
fs = FlightSegment.new
|
92
|
+
fs.row_hash = 'madison to minneapolis'
|
93
|
+
fs.aircraft_description = 'bing 747'
|
94
|
+
fs.passengers = 100
|
95
|
+
fs.seats = 5
|
96
|
+
fs.save!
|
97
|
+
|
98
|
+
FlightSegment.find_each do |fs|
|
99
|
+
fs.cache_aircraft!
|
100
|
+
end
|
101
|
+
|
102
|
+
class TestCache < Test::Unit::TestCase
|
103
|
+
def test_002_one_degree_of_separation
|
104
|
+
aircraft = Aircraft.find('B742')
|
105
|
+
assert_equal 2, aircraft.flight_segments.count
|
106
|
+
end
|
107
|
+
|
108
|
+
def test_003_standard_sql_calculations
|
109
|
+
aircraft = Aircraft.find('B742')
|
110
|
+
assert_equal 110, aircraft.flight_segments.sum(:passengers)
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_004_weighted_average
|
114
|
+
aircraft = Aircraft.find('B742')
|
115
|
+
assert_equal 5.4545, aircraft.flight_segments.weighted_average(:seats, :weighted_by => :passengers)
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_005_right_way_to_do_cohorts
|
119
|
+
aircraft = Aircraft.find('B742')
|
120
|
+
assert_equal 2, FlightSegment.big_cohort(:aircraft_description => aircraft.flight_segments_foreign_keys).count
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_006_you_can_get_aircraft_from_flight_segments
|
124
|
+
fs = FlightSegment.first
|
125
|
+
# you need to add an aircraft_description column
|
126
|
+
assert_raises(ActiveRecord::StatementInvalid) do
|
127
|
+
assert_equal 2, fs.aircraft.count
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: loose_tight_dictionary
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.2.
|
5
|
+
version: 0.2.3
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Seamus Abshere
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-05-
|
13
|
+
date: 2011-05-17 00:00:00 -05:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -36,18 +36,18 @@ dependencies:
|
|
36
36
|
type: :development
|
37
37
|
version_requirements: *id002
|
38
38
|
- !ruby/object:Gem::Dependency
|
39
|
-
name:
|
39
|
+
name: activerecord
|
40
40
|
prerelease: false
|
41
41
|
requirement: &id003 !ruby/object:Gem::Requirement
|
42
42
|
none: false
|
43
43
|
requirements:
|
44
44
|
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version:
|
47
|
-
type: :
|
46
|
+
version: "3"
|
47
|
+
type: :development
|
48
48
|
version_requirements: *id003
|
49
49
|
- !ruby/object:Gem::Dependency
|
50
|
-
name:
|
50
|
+
name: mysql
|
51
51
|
prerelease: false
|
52
52
|
requirement: &id004 !ruby/object:Gem::Requirement
|
53
53
|
none: false
|
@@ -55,19 +55,63 @@ dependencies:
|
|
55
55
|
- - ">="
|
56
56
|
- !ruby/object:Gem::Version
|
57
57
|
version: "0"
|
58
|
-
type: :
|
58
|
+
type: :development
|
59
59
|
version_requirements: *id004
|
60
60
|
- !ruby/object:Gem::Dependency
|
61
|
-
name:
|
61
|
+
name: cohort_scope
|
62
62
|
prerelease: false
|
63
63
|
requirement: &id005 !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: "0"
|
69
|
+
type: :development
|
70
|
+
version_requirements: *id005
|
71
|
+
- !ruby/object:Gem::Dependency
|
72
|
+
name: weighted_average
|
73
|
+
prerelease: false
|
74
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: "0"
|
80
|
+
type: :development
|
81
|
+
version_requirements: *id006
|
82
|
+
- !ruby/object:Gem::Dependency
|
83
|
+
name: activesupport
|
84
|
+
prerelease: false
|
85
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
86
|
+
none: false
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: "3"
|
91
|
+
type: :runtime
|
92
|
+
version_requirements: *id007
|
93
|
+
- !ruby/object:Gem::Dependency
|
94
|
+
name: amatch
|
95
|
+
prerelease: false
|
96
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: "0"
|
102
|
+
type: :runtime
|
103
|
+
version_requirements: *id008
|
104
|
+
- !ruby/object:Gem::Dependency
|
105
|
+
name: to_regexp
|
106
|
+
prerelease: false
|
107
|
+
requirement: &id009 !ruby/object:Gem::Requirement
|
64
108
|
none: false
|
65
109
|
requirements:
|
66
110
|
- - ">="
|
67
111
|
- !ruby/object:Gem::Version
|
68
112
|
version: 0.0.3
|
69
113
|
type: :runtime
|
70
|
-
version_requirements: *
|
114
|
+
version_requirements: *id009
|
71
115
|
description: Create dictionaries that link rows between two tables using loose matching (string similarity) by default and tight matching (regexp) by request.
|
72
116
|
email:
|
73
117
|
- seamus@abshere.net
|
@@ -104,6 +148,7 @@ files:
|
|
104
148
|
- examples/icao-bts.xls
|
105
149
|
- lib/loose_tight_dictionary.rb
|
106
150
|
- lib/loose_tight_dictionary/blocking.rb
|
151
|
+
- lib/loose_tight_dictionary/cached_result.rb
|
107
152
|
- lib/loose_tight_dictionary/identity.rb
|
108
153
|
- lib/loose_tight_dictionary/result.rb
|
109
154
|
- lib/loose_tight_dictionary/score.rb
|
@@ -114,6 +159,7 @@ files:
|
|
114
159
|
- loose_tight_dictionary.gemspec
|
115
160
|
- test/helper.rb
|
116
161
|
- test/test_blocking.rb
|
162
|
+
- test/test_cache.rb
|
117
163
|
- test/test_identity.rb
|
118
164
|
- test/test_loose_tight_dictionary.rb
|
119
165
|
- test/test_loose_tight_dictionary_convoluted.rb.disabled
|
@@ -149,6 +195,7 @@ summary: Allows iterative development of dictionaries for big data sets.
|
|
149
195
|
test_files:
|
150
196
|
- test/helper.rb
|
151
197
|
- test/test_blocking.rb
|
198
|
+
- test/test_cache.rb
|
152
199
|
- test/test_identity.rb
|
153
200
|
- test/test_loose_tight_dictionary.rb
|
154
201
|
- test/test_loose_tight_dictionary_convoluted.rb.disabled
|