loose_tight_dictionary 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
class LooseTightDictionary
|
|
2
|
+
class CachedResult < ::ActiveRecord::Base
|
|
3
|
+
set_table_name :loose_tight_dictionary_cached_results
|
|
4
|
+
|
|
5
|
+
def self.create_table
|
|
6
|
+
connection.create_table :loose_tight_dictionary_cached_results do |t|
|
|
7
|
+
t.string :a_class
|
|
8
|
+
t.string :a
|
|
9
|
+
t.string :b_class
|
|
10
|
+
t.string :b
|
|
11
|
+
end
|
|
12
|
+
connection.add_index :loose_tight_dictionary_cached_results, [:a_class, :b_class, :a], :name => 'aba'
|
|
13
|
+
connection.add_index :loose_tight_dictionary_cached_results, [:a_class, :b_class, :b], :name => 'abb'
|
|
14
|
+
connection.add_index :loose_tight_dictionary_cached_results, [:a_class, :b_class, :a, :b], :name => 'abab'
|
|
15
|
+
reset_column_information
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def self.setup(from_scratch = false)
|
|
19
|
+
connection.drop_table :loose_tight_dictionary_cached_results if from_scratch and table_exists?
|
|
20
|
+
create_table unless table_exists?
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
module ActiveRecordBaseExtension
|
|
24
|
+
# required options:
|
|
25
|
+
# :primary_key - what to call on this class
|
|
26
|
+
# :foreign_key - what to call on the other class
|
|
27
|
+
def cache_loose_tight_dictionary_matches_with(other_active_record_class, options)
|
|
28
|
+
other = other_active_record_class.to_s.singularize.camelcase
|
|
29
|
+
me = name
|
|
30
|
+
if me < other
|
|
31
|
+
a = me
|
|
32
|
+
b = other
|
|
33
|
+
primary_key = :a
|
|
34
|
+
foreign_key = :b
|
|
35
|
+
else
|
|
36
|
+
a = other
|
|
37
|
+
b = me
|
|
38
|
+
primary_key = :b
|
|
39
|
+
foreign_key = :a
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# def aircraft
|
|
43
|
+
define_method other.underscore.pluralize do
|
|
44
|
+
other.constantize.where options[:foreign_key] => send("#{other.underscore.pluralize}_foreign_keys")
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# def flight_segments_foreign_keys
|
|
48
|
+
define_method "#{other.underscore.pluralize}_foreign_keys" do
|
|
49
|
+
fz = ::LooseTightDictionary::CachedResult.arel_table
|
|
50
|
+
sql = fz.project(fz[foreign_key]).where(fz["#{primary_key}_class".to_sym].eq(self.class.name).and(fz["#{foreign_key}_class".to_sym].eq(other)).and(fz[primary_key].eq(send(options[:primary_key])))).to_sql
|
|
51
|
+
connection.select_values sql
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# def cache_aircraft!
|
|
55
|
+
define_method "cache_#{other.underscore.pluralize}!" do
|
|
56
|
+
other_class = other.constantize
|
|
57
|
+
primary_key_value = send options[:primary_key]
|
|
58
|
+
other_class.loose_tight_dictionary.find_all(primary_key_value).each do |other_instance|
|
|
59
|
+
attrs = {}
|
|
60
|
+
attrs[primary_key] = primary_key_value
|
|
61
|
+
attrs["#{primary_key}_class"] = self.class.name
|
|
62
|
+
attrs[foreign_key] = other_instance.send options[:foreign_key]
|
|
63
|
+
attrs["#{foreign_key}_class"] = other
|
|
64
|
+
unless ::LooseTightDictionary::CachedResult.exists? attrs
|
|
65
|
+
::LooseTightDictionary::CachedResult.create! attrs
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
::ActiveRecord::Base.extend ::LooseTightDictionary::CachedResult::ActiveRecordBaseExtension
|
|
@@ -18,6 +18,7 @@ class LooseTightDictionary
|
|
|
18
18
|
autoload :Wrapper, 'loose_tight_dictionary/wrapper'
|
|
19
19
|
autoload :Similarity, 'loose_tight_dictionary/similarity'
|
|
20
20
|
autoload :Score, 'loose_tight_dictionary/score'
|
|
21
|
+
autoload :CachedResult, 'loose_tight_dictionary/cached_result'
|
|
21
22
|
|
|
22
23
|
class Freed < RuntimeError; end
|
|
23
24
|
|
|
@@ -21,7 +21,11 @@ Gem::Specification.new do |s|
|
|
|
21
21
|
|
|
22
22
|
s.add_development_dependency "shoulda"
|
|
23
23
|
s.add_development_dependency "remote_table"
|
|
24
|
-
s.
|
|
24
|
+
s.add_development_dependency 'activerecord', '>=3'
|
|
25
|
+
s.add_development_dependency 'mysql'
|
|
26
|
+
s.add_development_dependency 'cohort_scope'
|
|
27
|
+
s.add_development_dependency 'weighted_average'
|
|
28
|
+
s.add_dependency 'activesupport', '>=3'
|
|
25
29
|
s.add_dependency 'amatch'
|
|
26
30
|
s.add_dependency 'to_regexp', '>=0.0.3'
|
|
27
31
|
end
|
data/test/test_cache.rb
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
require 'helper'
|
|
2
|
+
|
|
3
|
+
require 'active_support/all'
|
|
4
|
+
require 'active_record'
|
|
5
|
+
require 'cohort_scope'
|
|
6
|
+
require 'weighted_average'
|
|
7
|
+
|
|
8
|
+
ActiveRecord::Base.establish_connection(
|
|
9
|
+
'adapter' => 'mysql',
|
|
10
|
+
'database' => 'loose_tight_dictionary_test',
|
|
11
|
+
'username' => 'root',
|
|
12
|
+
'password' => 'password'
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# ActiveRecord::Base.logger = Logger.new $stderr
|
|
16
|
+
|
|
17
|
+
ActiveSupport::Inflector.inflections do |inflect|
|
|
18
|
+
inflect.uncountable 'aircraft'
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
require 'loose_tight_dictionary/cached_result'
|
|
22
|
+
|
|
23
|
+
::LooseTightDictionary::CachedResult.setup(true)
|
|
24
|
+
::LooseTightDictionary::CachedResult.delete_all
|
|
25
|
+
|
|
26
|
+
class Aircraft < ActiveRecord::Base
|
|
27
|
+
set_primary_key :icao_code
|
|
28
|
+
|
|
29
|
+
cache_loose_tight_dictionary_matches_with :flight_segments, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
|
|
30
|
+
|
|
31
|
+
def aircraft_description
|
|
32
|
+
[manufacturer_name, model_name].compact.join(' ')
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.loose_tight_dictionary
|
|
36
|
+
@loose_tight_dictionary ||= LooseTightDictionary.new all, :haystack_reader => lambda { |straw| straw.aircraft_description }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def self.create_table
|
|
40
|
+
connection.drop_table(:aircraft) rescue nil
|
|
41
|
+
connection.execute %{
|
|
42
|
+
CREATE TABLE `aircraft` (
|
|
43
|
+
`icao_code` varchar(255) DEFAULT NULL,
|
|
44
|
+
`manufacturer_name` varchar(255) DEFAULT NULL,
|
|
45
|
+
`model_name` varchar(255) DEFAULT NULL,
|
|
46
|
+
PRIMARY KEY (`icao_code`)
|
|
47
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
|
48
|
+
}
|
|
49
|
+
reset_column_information
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
class FlightSegment < ActiveRecord::Base
|
|
54
|
+
set_primary_key :row_hash
|
|
55
|
+
|
|
56
|
+
cache_loose_tight_dictionary_matches_with :aircraft, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
|
|
57
|
+
|
|
58
|
+
extend CohortScope
|
|
59
|
+
self.minimum_cohort_size = 1
|
|
60
|
+
|
|
61
|
+
def self.create_table
|
|
62
|
+
connection.drop_table(:flight_segments) rescue nil
|
|
63
|
+
connection.execute %{
|
|
64
|
+
CREATE TABLE `flight_segments` (
|
|
65
|
+
`row_hash` varchar(255) NOT NULL DEFAULT '',
|
|
66
|
+
`aircraft_description` varchar(255) DEFAULT NULL,
|
|
67
|
+
`passengers` int(11) DEFAULT NULL,
|
|
68
|
+
`seats` int(11) DEFAULT NULL,
|
|
69
|
+
PRIMARY KEY (`row_hash`)
|
|
70
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
|
71
|
+
}
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
FlightSegment.create_table
|
|
76
|
+
Aircraft.create_table
|
|
77
|
+
|
|
78
|
+
a = Aircraft.new
|
|
79
|
+
a.icao_code = 'B742'
|
|
80
|
+
a.manufacturer_name = 'Boeing'
|
|
81
|
+
a.model_name = '747-200'
|
|
82
|
+
a.save!
|
|
83
|
+
|
|
84
|
+
fs = FlightSegment.new
|
|
85
|
+
fs.row_hash = 'madison to chicago'
|
|
86
|
+
fs.aircraft_description = 'BORING 747200'
|
|
87
|
+
fs.passengers = 10
|
|
88
|
+
fs.seats = 10
|
|
89
|
+
fs.save!
|
|
90
|
+
|
|
91
|
+
fs = FlightSegment.new
|
|
92
|
+
fs.row_hash = 'madison to minneapolis'
|
|
93
|
+
fs.aircraft_description = 'bing 747'
|
|
94
|
+
fs.passengers = 100
|
|
95
|
+
fs.seats = 5
|
|
96
|
+
fs.save!
|
|
97
|
+
|
|
98
|
+
FlightSegment.find_each do |fs|
|
|
99
|
+
fs.cache_aircraft!
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
class TestCache < Test::Unit::TestCase
|
|
103
|
+
def test_002_one_degree_of_separation
|
|
104
|
+
aircraft = Aircraft.find('B742')
|
|
105
|
+
assert_equal 2, aircraft.flight_segments.count
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def test_003_standard_sql_calculations
|
|
109
|
+
aircraft = Aircraft.find('B742')
|
|
110
|
+
assert_equal 110, aircraft.flight_segments.sum(:passengers)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def test_004_weighted_average
|
|
114
|
+
aircraft = Aircraft.find('B742')
|
|
115
|
+
assert_equal 5.4545, aircraft.flight_segments.weighted_average(:seats, :weighted_by => :passengers)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def test_005_right_way_to_do_cohorts
|
|
119
|
+
aircraft = Aircraft.find('B742')
|
|
120
|
+
assert_equal 2, FlightSegment.big_cohort(:aircraft_description => aircraft.flight_segments_foreign_keys).count
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def test_006_you_can_get_aircraft_from_flight_segments
|
|
124
|
+
fs = FlightSegment.first
|
|
125
|
+
# you need to add an aircraft_description column
|
|
126
|
+
assert_raises(ActiveRecord::StatementInvalid) do
|
|
127
|
+
assert_equal 2, fs.aircraft.count
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
metadata
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
name: loose_tight_dictionary
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease:
|
|
5
|
-
version: 0.2.
|
|
5
|
+
version: 0.2.3
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
8
8
|
- Seamus Abshere
|
|
@@ -10,7 +10,7 @@ autorequire:
|
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
12
|
|
|
13
|
-
date: 2011-05-
|
|
13
|
+
date: 2011-05-17 00:00:00 -05:00
|
|
14
14
|
default_executable:
|
|
15
15
|
dependencies:
|
|
16
16
|
- !ruby/object:Gem::Dependency
|
|
@@ -36,18 +36,18 @@ dependencies:
|
|
|
36
36
|
type: :development
|
|
37
37
|
version_requirements: *id002
|
|
38
38
|
- !ruby/object:Gem::Dependency
|
|
39
|
-
name:
|
|
39
|
+
name: activerecord
|
|
40
40
|
prerelease: false
|
|
41
41
|
requirement: &id003 !ruby/object:Gem::Requirement
|
|
42
42
|
none: false
|
|
43
43
|
requirements:
|
|
44
44
|
- - ">="
|
|
45
45
|
- !ruby/object:Gem::Version
|
|
46
|
-
version:
|
|
47
|
-
type: :
|
|
46
|
+
version: "3"
|
|
47
|
+
type: :development
|
|
48
48
|
version_requirements: *id003
|
|
49
49
|
- !ruby/object:Gem::Dependency
|
|
50
|
-
name:
|
|
50
|
+
name: mysql
|
|
51
51
|
prerelease: false
|
|
52
52
|
requirement: &id004 !ruby/object:Gem::Requirement
|
|
53
53
|
none: false
|
|
@@ -55,19 +55,63 @@ dependencies:
|
|
|
55
55
|
- - ">="
|
|
56
56
|
- !ruby/object:Gem::Version
|
|
57
57
|
version: "0"
|
|
58
|
-
type: :
|
|
58
|
+
type: :development
|
|
59
59
|
version_requirements: *id004
|
|
60
60
|
- !ruby/object:Gem::Dependency
|
|
61
|
-
name:
|
|
61
|
+
name: cohort_scope
|
|
62
62
|
prerelease: false
|
|
63
63
|
requirement: &id005 !ruby/object:Gem::Requirement
|
|
64
|
+
none: false
|
|
65
|
+
requirements:
|
|
66
|
+
- - ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: "0"
|
|
69
|
+
type: :development
|
|
70
|
+
version_requirements: *id005
|
|
71
|
+
- !ruby/object:Gem::Dependency
|
|
72
|
+
name: weighted_average
|
|
73
|
+
prerelease: false
|
|
74
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
|
75
|
+
none: false
|
|
76
|
+
requirements:
|
|
77
|
+
- - ">="
|
|
78
|
+
- !ruby/object:Gem::Version
|
|
79
|
+
version: "0"
|
|
80
|
+
type: :development
|
|
81
|
+
version_requirements: *id006
|
|
82
|
+
- !ruby/object:Gem::Dependency
|
|
83
|
+
name: activesupport
|
|
84
|
+
prerelease: false
|
|
85
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
|
86
|
+
none: false
|
|
87
|
+
requirements:
|
|
88
|
+
- - ">="
|
|
89
|
+
- !ruby/object:Gem::Version
|
|
90
|
+
version: "3"
|
|
91
|
+
type: :runtime
|
|
92
|
+
version_requirements: *id007
|
|
93
|
+
- !ruby/object:Gem::Dependency
|
|
94
|
+
name: amatch
|
|
95
|
+
prerelease: false
|
|
96
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
|
97
|
+
none: false
|
|
98
|
+
requirements:
|
|
99
|
+
- - ">="
|
|
100
|
+
- !ruby/object:Gem::Version
|
|
101
|
+
version: "0"
|
|
102
|
+
type: :runtime
|
|
103
|
+
version_requirements: *id008
|
|
104
|
+
- !ruby/object:Gem::Dependency
|
|
105
|
+
name: to_regexp
|
|
106
|
+
prerelease: false
|
|
107
|
+
requirement: &id009 !ruby/object:Gem::Requirement
|
|
64
108
|
none: false
|
|
65
109
|
requirements:
|
|
66
110
|
- - ">="
|
|
67
111
|
- !ruby/object:Gem::Version
|
|
68
112
|
version: 0.0.3
|
|
69
113
|
type: :runtime
|
|
70
|
-
version_requirements: *
|
|
114
|
+
version_requirements: *id009
|
|
71
115
|
description: Create dictionaries that link rows between two tables using loose matching (string similarity) by default and tight matching (regexp) by request.
|
|
72
116
|
email:
|
|
73
117
|
- seamus@abshere.net
|
|
@@ -104,6 +148,7 @@ files:
|
|
|
104
148
|
- examples/icao-bts.xls
|
|
105
149
|
- lib/loose_tight_dictionary.rb
|
|
106
150
|
- lib/loose_tight_dictionary/blocking.rb
|
|
151
|
+
- lib/loose_tight_dictionary/cached_result.rb
|
|
107
152
|
- lib/loose_tight_dictionary/identity.rb
|
|
108
153
|
- lib/loose_tight_dictionary/result.rb
|
|
109
154
|
- lib/loose_tight_dictionary/score.rb
|
|
@@ -114,6 +159,7 @@ files:
|
|
|
114
159
|
- loose_tight_dictionary.gemspec
|
|
115
160
|
- test/helper.rb
|
|
116
161
|
- test/test_blocking.rb
|
|
162
|
+
- test/test_cache.rb
|
|
117
163
|
- test/test_identity.rb
|
|
118
164
|
- test/test_loose_tight_dictionary.rb
|
|
119
165
|
- test/test_loose_tight_dictionary_convoluted.rb.disabled
|
|
@@ -149,6 +195,7 @@ summary: Allows iterative development of dictionaries for big data sets.
|
|
|
149
195
|
test_files:
|
|
150
196
|
- test/helper.rb
|
|
151
197
|
- test/test_blocking.rb
|
|
198
|
+
- test/test_cache.rb
|
|
152
199
|
- test/test_identity.rb
|
|
153
200
|
- test/test_loose_tight_dictionary.rb
|
|
154
201
|
- test/test_loose_tight_dictionary_convoluted.rb.disabled
|