loose_tight_dictionary 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ class LooseTightDictionary
2
+ class CachedResult < ::ActiveRecord::Base
3
+ set_table_name :loose_tight_dictionary_cached_results
4
+
5
+ def self.create_table
6
+ connection.create_table :loose_tight_dictionary_cached_results do |t|
7
+ t.string :a_class
8
+ t.string :a
9
+ t.string :b_class
10
+ t.string :b
11
+ end
12
+ connection.add_index :loose_tight_dictionary_cached_results, [:a_class, :b_class, :a], :name => 'aba'
13
+ connection.add_index :loose_tight_dictionary_cached_results, [:a_class, :b_class, :b], :name => 'abb'
14
+ connection.add_index :loose_tight_dictionary_cached_results, [:a_class, :b_class, :a, :b], :name => 'abab'
15
+ reset_column_information
16
+ end
17
+
18
+ def self.setup(from_scratch = false)
19
+ connection.drop_table :loose_tight_dictionary_cached_results if from_scratch and table_exists?
20
+ create_table unless table_exists?
21
+ end
22
+
23
+ module ActiveRecordBaseExtension
24
+ # required options:
25
+ # :primary_key - what to call on this class
26
+ # :foreign_key - what to call on the other class
27
+ def cache_loose_tight_dictionary_matches_with(other_active_record_class, options)
28
+ other = other_active_record_class.to_s.singularize.camelcase
29
+ me = name
30
+ if me < other
31
+ a = me
32
+ b = other
33
+ primary_key = :a
34
+ foreign_key = :b
35
+ else
36
+ a = other
37
+ b = me
38
+ primary_key = :b
39
+ foreign_key = :a
40
+ end
41
+
42
+ # def aircraft
43
+ define_method other.underscore.pluralize do
44
+ other.constantize.where options[:foreign_key] => send("#{other.underscore.pluralize}_foreign_keys")
45
+ end
46
+
47
+ # def flight_segments_foreign_keys
48
+ define_method "#{other.underscore.pluralize}_foreign_keys" do
49
+ fz = ::LooseTightDictionary::CachedResult.arel_table
50
+ sql = fz.project(fz[foreign_key]).where(fz["#{primary_key}_class".to_sym].eq(self.class.name).and(fz["#{foreign_key}_class".to_sym].eq(other)).and(fz[primary_key].eq(send(options[:primary_key])))).to_sql
51
+ connection.select_values sql
52
+ end
53
+
54
+ # def cache_aircraft!
55
+ define_method "cache_#{other.underscore.pluralize}!" do
56
+ other_class = other.constantize
57
+ primary_key_value = send options[:primary_key]
58
+ other_class.loose_tight_dictionary.find_all(primary_key_value).each do |other_instance|
59
+ attrs = {}
60
+ attrs[primary_key] = primary_key_value
61
+ attrs["#{primary_key}_class"] = self.class.name
62
+ attrs[foreign_key] = other_instance.send options[:foreign_key]
63
+ attrs["#{foreign_key}_class"] = other
64
+ unless ::LooseTightDictionary::CachedResult.exists? attrs
65
+ ::LooseTightDictionary::CachedResult.create! attrs
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ ::ActiveRecord::Base.extend ::LooseTightDictionary::CachedResult::ActiveRecordBaseExtension
@@ -1,3 +1,3 @@
1
1
  class LooseTightDictionary
2
- VERSION = '0.2.2'
2
+ VERSION = '0.2.3'
3
3
  end
@@ -18,6 +18,7 @@ class LooseTightDictionary
18
18
  autoload :Wrapper, 'loose_tight_dictionary/wrapper'
19
19
  autoload :Similarity, 'loose_tight_dictionary/similarity'
20
20
  autoload :Score, 'loose_tight_dictionary/score'
21
+ autoload :CachedResult, 'loose_tight_dictionary/cached_result'
21
22
 
22
23
  class Freed < RuntimeError; end
23
24
 
@@ -21,7 +21,11 @@ Gem::Specification.new do |s|
21
21
 
22
22
  s.add_development_dependency "shoulda"
23
23
  s.add_development_dependency "remote_table"
24
- s.add_dependency 'activesupport', '>=2.3.4'
24
+ s.add_development_dependency 'activerecord', '>=3'
25
+ s.add_development_dependency 'mysql'
26
+ s.add_development_dependency 'cohort_scope'
27
+ s.add_development_dependency 'weighted_average'
28
+ s.add_dependency 'activesupport', '>=3'
25
29
  s.add_dependency 'amatch'
26
30
  s.add_dependency 'to_regexp', '>=0.0.3'
27
31
  end
@@ -0,0 +1,130 @@
1
+ require 'helper'
2
+
3
+ require 'active_support/all'
4
+ require 'active_record'
5
+ require 'cohort_scope'
6
+ require 'weighted_average'
7
+
8
+ ActiveRecord::Base.establish_connection(
9
+ 'adapter' => 'mysql',
10
+ 'database' => 'loose_tight_dictionary_test',
11
+ 'username' => 'root',
12
+ 'password' => 'password'
13
+ )
14
+
15
+ # ActiveRecord::Base.logger = Logger.new $stderr
16
+
17
+ ActiveSupport::Inflector.inflections do |inflect|
18
+ inflect.uncountable 'aircraft'
19
+ end
20
+
21
+ require 'loose_tight_dictionary/cached_result'
22
+
23
+ ::LooseTightDictionary::CachedResult.setup(true)
24
+ ::LooseTightDictionary::CachedResult.delete_all
25
+
26
+ class Aircraft < ActiveRecord::Base
27
+ set_primary_key :icao_code
28
+
29
+ cache_loose_tight_dictionary_matches_with :flight_segments, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
30
+
31
+ def aircraft_description
32
+ [manufacturer_name, model_name].compact.join(' ')
33
+ end
34
+
35
+ def self.loose_tight_dictionary
36
+ @loose_tight_dictionary ||= LooseTightDictionary.new all, :haystack_reader => lambda { |straw| straw.aircraft_description }
37
+ end
38
+
39
+ def self.create_table
40
+ connection.drop_table(:aircraft) rescue nil
41
+ connection.execute %{
42
+ CREATE TABLE `aircraft` (
43
+ `icao_code` varchar(255) DEFAULT NULL,
44
+ `manufacturer_name` varchar(255) DEFAULT NULL,
45
+ `model_name` varchar(255) DEFAULT NULL,
46
+ PRIMARY KEY (`icao_code`)
47
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
48
+ }
49
+ reset_column_information
50
+ end
51
+ end
52
+
53
+ class FlightSegment < ActiveRecord::Base
54
+ set_primary_key :row_hash
55
+
56
+ cache_loose_tight_dictionary_matches_with :aircraft, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
57
+
58
+ extend CohortScope
59
+ self.minimum_cohort_size = 1
60
+
61
+ def self.create_table
62
+ connection.drop_table(:flight_segments) rescue nil
63
+ connection.execute %{
64
+ CREATE TABLE `flight_segments` (
65
+ `row_hash` varchar(255) NOT NULL DEFAULT '',
66
+ `aircraft_description` varchar(255) DEFAULT NULL,
67
+ `passengers` int(11) DEFAULT NULL,
68
+ `seats` int(11) DEFAULT NULL,
69
+ PRIMARY KEY (`row_hash`)
70
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
71
+ }
72
+ end
73
+ end
74
+
75
+ FlightSegment.create_table
76
+ Aircraft.create_table
77
+
78
+ a = Aircraft.new
79
+ a.icao_code = 'B742'
80
+ a.manufacturer_name = 'Boeing'
81
+ a.model_name = '747-200'
82
+ a.save!
83
+
84
+ fs = FlightSegment.new
85
+ fs.row_hash = 'madison to chicago'
86
+ fs.aircraft_description = 'BORING 747200'
87
+ fs.passengers = 10
88
+ fs.seats = 10
89
+ fs.save!
90
+
91
+ fs = FlightSegment.new
92
+ fs.row_hash = 'madison to minneapolis'
93
+ fs.aircraft_description = 'bing 747'
94
+ fs.passengers = 100
95
+ fs.seats = 5
96
+ fs.save!
97
+
98
+ FlightSegment.find_each do |fs|
99
+ fs.cache_aircraft!
100
+ end
101
+
102
+ class TestCache < Test::Unit::TestCase
103
+ def test_002_one_degree_of_separation
104
+ aircraft = Aircraft.find('B742')
105
+ assert_equal 2, aircraft.flight_segments.count
106
+ end
107
+
108
+ def test_003_standard_sql_calculations
109
+ aircraft = Aircraft.find('B742')
110
+ assert_equal 110, aircraft.flight_segments.sum(:passengers)
111
+ end
112
+
113
+ def test_004_weighted_average
114
+ aircraft = Aircraft.find('B742')
115
+ assert_equal 5.4545, aircraft.flight_segments.weighted_average(:seats, :weighted_by => :passengers)
116
+ end
117
+
118
+ def test_005_right_way_to_do_cohorts
119
+ aircraft = Aircraft.find('B742')
120
+ assert_equal 2, FlightSegment.big_cohort(:aircraft_description => aircraft.flight_segments_foreign_keys).count
121
+ end
122
+
123
+ def test_006_you_can_get_aircraft_from_flight_segments
124
+ fs = FlightSegment.first
125
+ # you need to add an aircraft_description column
126
+ assert_raises(ActiveRecord::StatementInvalid) do
127
+ assert_equal 2, fs.aircraft.count
128
+ end
129
+ end
130
+ end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: loose_tight_dictionary
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.2.2
5
+ version: 0.2.3
6
6
  platform: ruby
7
7
  authors:
8
8
  - Seamus Abshere
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-05-02 00:00:00 -05:00
13
+ date: 2011-05-17 00:00:00 -05:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -36,18 +36,18 @@ dependencies:
36
36
  type: :development
37
37
  version_requirements: *id002
38
38
  - !ruby/object:Gem::Dependency
39
- name: activesupport
39
+ name: activerecord
40
40
  prerelease: false
41
41
  requirement: &id003 !ruby/object:Gem::Requirement
42
42
  none: false
43
43
  requirements:
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: 2.3.4
47
- type: :runtime
46
+ version: "3"
47
+ type: :development
48
48
  version_requirements: *id003
49
49
  - !ruby/object:Gem::Dependency
50
- name: amatch
50
+ name: mysql
51
51
  prerelease: false
52
52
  requirement: &id004 !ruby/object:Gem::Requirement
53
53
  none: false
@@ -55,19 +55,63 @@ dependencies:
55
55
  - - ">="
56
56
  - !ruby/object:Gem::Version
57
57
  version: "0"
58
- type: :runtime
58
+ type: :development
59
59
  version_requirements: *id004
60
60
  - !ruby/object:Gem::Dependency
61
- name: to_regexp
61
+ name: cohort_scope
62
62
  prerelease: false
63
63
  requirement: &id005 !ruby/object:Gem::Requirement
64
+ none: false
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ type: :development
70
+ version_requirements: *id005
71
+ - !ruby/object:Gem::Dependency
72
+ name: weighted_average
73
+ prerelease: false
74
+ requirement: &id006 !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: "0"
80
+ type: :development
81
+ version_requirements: *id006
82
+ - !ruby/object:Gem::Dependency
83
+ name: activesupport
84
+ prerelease: false
85
+ requirement: &id007 !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: "3"
91
+ type: :runtime
92
+ version_requirements: *id007
93
+ - !ruby/object:Gem::Dependency
94
+ name: amatch
95
+ prerelease: false
96
+ requirement: &id008 !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: "0"
102
+ type: :runtime
103
+ version_requirements: *id008
104
+ - !ruby/object:Gem::Dependency
105
+ name: to_regexp
106
+ prerelease: false
107
+ requirement: &id009 !ruby/object:Gem::Requirement
64
108
  none: false
65
109
  requirements:
66
110
  - - ">="
67
111
  - !ruby/object:Gem::Version
68
112
  version: 0.0.3
69
113
  type: :runtime
70
- version_requirements: *id005
114
+ version_requirements: *id009
71
115
  description: Create dictionaries that link rows between two tables using loose matching (string similarity) by default and tight matching (regexp) by request.
72
116
  email:
73
117
  - seamus@abshere.net
@@ -104,6 +148,7 @@ files:
104
148
  - examples/icao-bts.xls
105
149
  - lib/loose_tight_dictionary.rb
106
150
  - lib/loose_tight_dictionary/blocking.rb
151
+ - lib/loose_tight_dictionary/cached_result.rb
107
152
  - lib/loose_tight_dictionary/identity.rb
108
153
  - lib/loose_tight_dictionary/result.rb
109
154
  - lib/loose_tight_dictionary/score.rb
@@ -114,6 +159,7 @@ files:
114
159
  - loose_tight_dictionary.gemspec
115
160
  - test/helper.rb
116
161
  - test/test_blocking.rb
162
+ - test/test_cache.rb
117
163
  - test/test_identity.rb
118
164
  - test/test_loose_tight_dictionary.rb
119
165
  - test/test_loose_tight_dictionary_convoluted.rb.disabled
@@ -149,6 +195,7 @@ summary: Allows iterative development of dictionaries for big data sets.
149
195
  test_files:
150
196
  - test/helper.rb
151
197
  - test/test_blocking.rb
198
+ - test/test_cache.rb
152
199
  - test/test_identity.rb
153
200
  - test/test_loose_tight_dictionary.rb
154
201
  - test/test_loose_tight_dictionary_convoluted.rb.disabled