loose_tight_dictionary 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,74 @@
1
+ class LooseTightDictionary
2
+ class CachedResult < ::ActiveRecord::Base
3
+ set_table_name :loose_tight_dictionary_cached_results
4
+
5
+ def self.create_table
6
+ connection.create_table :loose_tight_dictionary_cached_results do |t|
7
+ t.string :a_class
8
+ t.string :a
9
+ t.string :b_class
10
+ t.string :b
11
+ end
12
+ connection.add_index :loose_tight_dictionary_cached_results, [:a_class, :b_class, :a], :name => 'aba'
13
+ connection.add_index :loose_tight_dictionary_cached_results, [:a_class, :b_class, :b], :name => 'abb'
14
+ connection.add_index :loose_tight_dictionary_cached_results, [:a_class, :b_class, :a, :b], :name => 'abab'
15
+ reset_column_information
16
+ end
17
+
18
+ def self.setup(from_scratch = false)
19
+ connection.drop_table :loose_tight_dictionary_cached_results if from_scratch and table_exists?
20
+ create_table unless table_exists?
21
+ end
22
+
23
+ module ActiveRecordBaseExtension
24
+ # required options:
25
+ # :primary_key - what to call on this class
26
+ # :foreign_key - what to call on the other class
27
+ def cache_loose_tight_dictionary_matches_with(other_active_record_class, options)
28
+ other = other_active_record_class.to_s.singularize.camelcase
29
+ me = name
30
+ if me < other
31
+ a = me
32
+ b = other
33
+ primary_key = :a
34
+ foreign_key = :b
35
+ else
36
+ a = other
37
+ b = me
38
+ primary_key = :b
39
+ foreign_key = :a
40
+ end
41
+
42
+ # def aircraft
43
+ define_method other.underscore.pluralize do
44
+ other.constantize.where options[:foreign_key] => send("#{other.underscore.pluralize}_foreign_keys")
45
+ end
46
+
47
+ # def flight_segments_foreign_keys
48
+ define_method "#{other.underscore.pluralize}_foreign_keys" do
49
+ fz = ::LooseTightDictionary::CachedResult.arel_table
50
+ sql = fz.project(fz[foreign_key]).where(fz["#{primary_key}_class".to_sym].eq(self.class.name).and(fz["#{foreign_key}_class".to_sym].eq(other)).and(fz[primary_key].eq(send(options[:primary_key])))).to_sql
51
+ connection.select_values sql
52
+ end
53
+
54
+ # def cache_aircraft!
55
+ define_method "cache_#{other.underscore.pluralize}!" do
56
+ other_class = other.constantize
57
+ primary_key_value = send options[:primary_key]
58
+ other_class.loose_tight_dictionary.find_all(primary_key_value).each do |other_instance|
59
+ attrs = {}
60
+ attrs[primary_key] = primary_key_value
61
+ attrs["#{primary_key}_class"] = self.class.name
62
+ attrs[foreign_key] = other_instance.send options[:foreign_key]
63
+ attrs["#{foreign_key}_class"] = other
64
+ unless ::LooseTightDictionary::CachedResult.exists? attrs
65
+ ::LooseTightDictionary::CachedResult.create! attrs
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ ::ActiveRecord::Base.extend ::LooseTightDictionary::CachedResult::ActiveRecordBaseExtension
@@ -1,3 +1,3 @@
1
1
  class LooseTightDictionary
2
- VERSION = '0.2.2'
2
+ VERSION = '0.2.3'
3
3
  end
@@ -18,6 +18,7 @@ class LooseTightDictionary
18
18
  autoload :Wrapper, 'loose_tight_dictionary/wrapper'
19
19
  autoload :Similarity, 'loose_tight_dictionary/similarity'
20
20
  autoload :Score, 'loose_tight_dictionary/score'
21
+ autoload :CachedResult, 'loose_tight_dictionary/cached_result'
21
22
 
22
23
  class Freed < RuntimeError; end
23
24
 
@@ -21,7 +21,11 @@ Gem::Specification.new do |s|
21
21
 
22
22
  s.add_development_dependency "shoulda"
23
23
  s.add_development_dependency "remote_table"
24
- s.add_dependency 'activesupport', '>=2.3.4'
24
+ s.add_development_dependency 'activerecord', '>=3'
25
+ s.add_development_dependency 'mysql'
26
+ s.add_development_dependency 'cohort_scope'
27
+ s.add_development_dependency 'weighted_average'
28
+ s.add_dependency 'activesupport', '>=3'
25
29
  s.add_dependency 'amatch'
26
30
  s.add_dependency 'to_regexp', '>=0.0.3'
27
31
  end
@@ -0,0 +1,130 @@
1
+ require 'helper'
2
+
3
+ require 'active_support/all'
4
+ require 'active_record'
5
+ require 'cohort_scope'
6
+ require 'weighted_average'
7
+
8
+ ActiveRecord::Base.establish_connection(
9
+ 'adapter' => 'mysql',
10
+ 'database' => 'loose_tight_dictionary_test',
11
+ 'username' => 'root',
12
+ 'password' => 'password'
13
+ )
14
+
15
+ # ActiveRecord::Base.logger = Logger.new $stderr
16
+
17
+ ActiveSupport::Inflector.inflections do |inflect|
18
+ inflect.uncountable 'aircraft'
19
+ end
20
+
21
+ require 'loose_tight_dictionary/cached_result'
22
+
23
+ ::LooseTightDictionary::CachedResult.setup(true)
24
+ ::LooseTightDictionary::CachedResult.delete_all
25
+
26
+ class Aircraft < ActiveRecord::Base
27
+ set_primary_key :icao_code
28
+
29
+ cache_loose_tight_dictionary_matches_with :flight_segments, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
30
+
31
+ def aircraft_description
32
+ [manufacturer_name, model_name].compact.join(' ')
33
+ end
34
+
35
+ def self.loose_tight_dictionary
36
+ @loose_tight_dictionary ||= LooseTightDictionary.new all, :haystack_reader => lambda { |straw| straw.aircraft_description }
37
+ end
38
+
39
+ def self.create_table
40
+ connection.drop_table(:aircraft) rescue nil
41
+ connection.execute %{
42
+ CREATE TABLE `aircraft` (
43
+ `icao_code` varchar(255) DEFAULT NULL,
44
+ `manufacturer_name` varchar(255) DEFAULT NULL,
45
+ `model_name` varchar(255) DEFAULT NULL,
46
+ PRIMARY KEY (`icao_code`)
47
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
48
+ }
49
+ reset_column_information
50
+ end
51
+ end
52
+
53
+ class FlightSegment < ActiveRecord::Base
54
+ set_primary_key :row_hash
55
+
56
+ cache_loose_tight_dictionary_matches_with :aircraft, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
57
+
58
+ extend CohortScope
59
+ self.minimum_cohort_size = 1
60
+
61
+ def self.create_table
62
+ connection.drop_table(:flight_segments) rescue nil
63
+ connection.execute %{
64
+ CREATE TABLE `flight_segments` (
65
+ `row_hash` varchar(255) NOT NULL DEFAULT '',
66
+ `aircraft_description` varchar(255) DEFAULT NULL,
67
+ `passengers` int(11) DEFAULT NULL,
68
+ `seats` int(11) DEFAULT NULL,
69
+ PRIMARY KEY (`row_hash`)
70
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
71
+ }
72
+ end
73
+ end
74
+
75
+ FlightSegment.create_table
76
+ Aircraft.create_table
77
+
78
+ a = Aircraft.new
79
+ a.icao_code = 'B742'
80
+ a.manufacturer_name = 'Boeing'
81
+ a.model_name = '747-200'
82
+ a.save!
83
+
84
+ fs = FlightSegment.new
85
+ fs.row_hash = 'madison to chicago'
86
+ fs.aircraft_description = 'BORING 747200'
87
+ fs.passengers = 10
88
+ fs.seats = 10
89
+ fs.save!
90
+
91
+ fs = FlightSegment.new
92
+ fs.row_hash = 'madison to minneapolis'
93
+ fs.aircraft_description = 'bing 747'
94
+ fs.passengers = 100
95
+ fs.seats = 5
96
+ fs.save!
97
+
98
+ FlightSegment.find_each do |fs|
99
+ fs.cache_aircraft!
100
+ end
101
+
102
+ class TestCache < Test::Unit::TestCase
103
+ def test_002_one_degree_of_separation
104
+ aircraft = Aircraft.find('B742')
105
+ assert_equal 2, aircraft.flight_segments.count
106
+ end
107
+
108
+ def test_003_standard_sql_calculations
109
+ aircraft = Aircraft.find('B742')
110
+ assert_equal 110, aircraft.flight_segments.sum(:passengers)
111
+ end
112
+
113
+ def test_004_weighted_average
114
+ aircraft = Aircraft.find('B742')
115
+ assert_equal 5.4545, aircraft.flight_segments.weighted_average(:seats, :weighted_by => :passengers)
116
+ end
117
+
118
+ def test_005_right_way_to_do_cohorts
119
+ aircraft = Aircraft.find('B742')
120
+ assert_equal 2, FlightSegment.big_cohort(:aircraft_description => aircraft.flight_segments_foreign_keys).count
121
+ end
122
+
123
+ def test_006_you_can_get_aircraft_from_flight_segments
124
+ fs = FlightSegment.first
125
+ # you need to add an aircraft_description column
126
+ assert_raises(ActiveRecord::StatementInvalid) do
127
+ assert_equal 2, fs.aircraft.count
128
+ end
129
+ end
130
+ end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: loose_tight_dictionary
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.2.2
5
+ version: 0.2.3
6
6
  platform: ruby
7
7
  authors:
8
8
  - Seamus Abshere
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-05-02 00:00:00 -05:00
13
+ date: 2011-05-17 00:00:00 -05:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -36,18 +36,18 @@ dependencies:
36
36
  type: :development
37
37
  version_requirements: *id002
38
38
  - !ruby/object:Gem::Dependency
39
- name: activesupport
39
+ name: activerecord
40
40
  prerelease: false
41
41
  requirement: &id003 !ruby/object:Gem::Requirement
42
42
  none: false
43
43
  requirements:
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: 2.3.4
47
- type: :runtime
46
+ version: "3"
47
+ type: :development
48
48
  version_requirements: *id003
49
49
  - !ruby/object:Gem::Dependency
50
- name: amatch
50
+ name: mysql
51
51
  prerelease: false
52
52
  requirement: &id004 !ruby/object:Gem::Requirement
53
53
  none: false
@@ -55,19 +55,63 @@ dependencies:
55
55
  - - ">="
56
56
  - !ruby/object:Gem::Version
57
57
  version: "0"
58
- type: :runtime
58
+ type: :development
59
59
  version_requirements: *id004
60
60
  - !ruby/object:Gem::Dependency
61
- name: to_regexp
61
+ name: cohort_scope
62
62
  prerelease: false
63
63
  requirement: &id005 !ruby/object:Gem::Requirement
64
+ none: false
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ type: :development
70
+ version_requirements: *id005
71
+ - !ruby/object:Gem::Dependency
72
+ name: weighted_average
73
+ prerelease: false
74
+ requirement: &id006 !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: "0"
80
+ type: :development
81
+ version_requirements: *id006
82
+ - !ruby/object:Gem::Dependency
83
+ name: activesupport
84
+ prerelease: false
85
+ requirement: &id007 !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: "3"
91
+ type: :runtime
92
+ version_requirements: *id007
93
+ - !ruby/object:Gem::Dependency
94
+ name: amatch
95
+ prerelease: false
96
+ requirement: &id008 !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: "0"
102
+ type: :runtime
103
+ version_requirements: *id008
104
+ - !ruby/object:Gem::Dependency
105
+ name: to_regexp
106
+ prerelease: false
107
+ requirement: &id009 !ruby/object:Gem::Requirement
64
108
  none: false
65
109
  requirements:
66
110
  - - ">="
67
111
  - !ruby/object:Gem::Version
68
112
  version: 0.0.3
69
113
  type: :runtime
70
- version_requirements: *id005
114
+ version_requirements: *id009
71
115
  description: Create dictionaries that link rows between two tables using loose matching (string similarity) by default and tight matching (regexp) by request.
72
116
  email:
73
117
  - seamus@abshere.net
@@ -104,6 +148,7 @@ files:
104
148
  - examples/icao-bts.xls
105
149
  - lib/loose_tight_dictionary.rb
106
150
  - lib/loose_tight_dictionary/blocking.rb
151
+ - lib/loose_tight_dictionary/cached_result.rb
107
152
  - lib/loose_tight_dictionary/identity.rb
108
153
  - lib/loose_tight_dictionary/result.rb
109
154
  - lib/loose_tight_dictionary/score.rb
@@ -114,6 +159,7 @@ files:
114
159
  - loose_tight_dictionary.gemspec
115
160
  - test/helper.rb
116
161
  - test/test_blocking.rb
162
+ - test/test_cache.rb
117
163
  - test/test_identity.rb
118
164
  - test/test_loose_tight_dictionary.rb
119
165
  - test/test_loose_tight_dictionary_convoluted.rb.disabled
@@ -149,6 +195,7 @@ summary: Allows iterative development of dictionaries for big data sets.
149
195
  test_files:
150
196
  - test/helper.rb
151
197
  - test/test_blocking.rb
198
+ - test/test_cache.rb
152
199
  - test/test_identity.rb
153
200
  - test/test_loose_tight_dictionary.rb
154
201
  - test/test_loose_tight_dictionary_convoluted.rb.disabled