loose_tight_dictionary 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +0 -1
- data/VERSION +1 -1
- data/lib/loose_tight_dictionary.rb +13 -4
- data/loose_tight_dictionary.gemspec +71 -0
- data/test/helper.rb +2 -2
- data/test/test_loose_tight_dictionary.rb +7 -4
- metadata +24 -22
data/Rakefile
CHANGED
|
@@ -13,7 +13,6 @@ begin
|
|
|
13
13
|
gem.add_development_dependency "shoulda"
|
|
14
14
|
gem.add_development_dependency "remote_table", ">=0.2.19"
|
|
15
15
|
gem.add_dependency 'activesupport', '>=2.3.4'
|
|
16
|
-
gem.add_dependency 'fastercsv', '>=1.5.3'
|
|
17
16
|
gem.add_dependency 'andand', '>=1.3.1'
|
|
18
17
|
gem.add_dependency 'amatch', '>=0.2.5'
|
|
19
18
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.0.
|
|
1
|
+
0.0.9
|
|
@@ -7,7 +7,16 @@ require 'active_support/version'
|
|
|
7
7
|
end if ActiveSupport::VERSION::MAJOR == 3
|
|
8
8
|
require 'amatch'
|
|
9
9
|
require 'andand'
|
|
10
|
-
|
|
10
|
+
if RUBY_VERSION >= '1.9'
|
|
11
|
+
require 'csv'
|
|
12
|
+
else
|
|
13
|
+
begin
|
|
14
|
+
require 'fastercsv'
|
|
15
|
+
rescue LoadError
|
|
16
|
+
$stderr.puts "[loose_tight_dictionary gem] You probably need to manually install the fastercsv gem."
|
|
17
|
+
raise $!
|
|
18
|
+
end
|
|
19
|
+
end
|
|
11
20
|
|
|
12
21
|
class LooseTightDictionary
|
|
13
22
|
class MissedChecks < RuntimeError; end
|
|
@@ -92,11 +101,11 @@ class LooseTightDictionary
|
|
|
92
101
|
|
|
93
102
|
if positive_record = positives.andand.detect { |record| record[0] == left }
|
|
94
103
|
correct_right = positive_record[1]
|
|
95
|
-
if correct_right.
|
|
104
|
+
if correct_right.present? and right.blank?
|
|
96
105
|
logger.andand.debug " Mismatch! (should match SOMETHING)"
|
|
97
106
|
raise Mismatch
|
|
98
107
|
elsif right != correct_right
|
|
99
|
-
logger.andand.debug " Mismatch! (should be #{correct_right})"
|
|
108
|
+
logger.andand.debug " Mismatch! (#{right} should be #{correct_right})"
|
|
100
109
|
raise Mismatch
|
|
101
110
|
end
|
|
102
111
|
end
|
|
@@ -107,7 +116,7 @@ class LooseTightDictionary
|
|
|
107
116
|
logger.andand.debug " False positive! (should NOT match ANYTHING)"
|
|
108
117
|
raise FalsePositive
|
|
109
118
|
elsif right == incorrect_right
|
|
110
|
-
logger.andand.debug " False positive! (should NOT be #{incorrect_right})"
|
|
119
|
+
logger.andand.debug " False positive! (#{right} should NOT be #{incorrect_right})"
|
|
111
120
|
raise FalsePositive
|
|
112
121
|
end
|
|
113
122
|
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# Generated by jeweler
|
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
|
4
|
+
# -*- encoding: utf-8 -*-
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |s|
|
|
7
|
+
s.name = %q{loose_tight_dictionary}
|
|
8
|
+
s.version = "0.0.9"
|
|
9
|
+
|
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
|
+
s.authors = ["Seamus Abshere"]
|
|
12
|
+
s.date = %q{2010-09-30}
|
|
13
|
+
s.description = %q{Create dictionaries that link rows between two tables (left and right) using loose matching (string similarity) by default and tight matching (regexp) by request.}
|
|
14
|
+
s.email = %q{seamus@abshere.net}
|
|
15
|
+
s.extra_rdoc_files = [
|
|
16
|
+
"LICENSE",
|
|
17
|
+
"README.rdoc"
|
|
18
|
+
]
|
|
19
|
+
s.files = [
|
|
20
|
+
".document",
|
|
21
|
+
".gitignore",
|
|
22
|
+
"LICENSE",
|
|
23
|
+
"README.rdoc",
|
|
24
|
+
"Rakefile",
|
|
25
|
+
"VERSION",
|
|
26
|
+
"examples/first_name_matching.rb",
|
|
27
|
+
"examples/icao-bts.rb",
|
|
28
|
+
"examples/icao-bts.xls",
|
|
29
|
+
"lib/loose_tight_dictionary.rb",
|
|
30
|
+
"loose_tight_dictionary.gemspec",
|
|
31
|
+
"test/helper.rb",
|
|
32
|
+
"test/test_loose_tight_dictionary.rb"
|
|
33
|
+
]
|
|
34
|
+
s.homepage = %q{http://github.com/seamusabshere/loose_tight_dictionary}
|
|
35
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
|
36
|
+
s.require_paths = ["lib"]
|
|
37
|
+
s.rubygems_version = %q{1.3.7}
|
|
38
|
+
s.summary = %q{Allows iterative development of dictionaries for big data sets.}
|
|
39
|
+
s.test_files = [
|
|
40
|
+
"test/helper.rb",
|
|
41
|
+
"test/test_loose_tight_dictionary.rb",
|
|
42
|
+
"examples/first_name_matching.rb",
|
|
43
|
+
"examples/icao-bts.rb"
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
if s.respond_to? :specification_version then
|
|
47
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
|
48
|
+
s.specification_version = 3
|
|
49
|
+
|
|
50
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
51
|
+
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
|
52
|
+
s.add_development_dependency(%q<remote_table>, [">= 0.2.19"])
|
|
53
|
+
s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
|
|
54
|
+
s.add_runtime_dependency(%q<andand>, [">= 1.3.1"])
|
|
55
|
+
s.add_runtime_dependency(%q<amatch>, [">= 0.2.5"])
|
|
56
|
+
else
|
|
57
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
|
58
|
+
s.add_dependency(%q<remote_table>, [">= 0.2.19"])
|
|
59
|
+
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
|
60
|
+
s.add_dependency(%q<andand>, [">= 1.3.1"])
|
|
61
|
+
s.add_dependency(%q<amatch>, [">= 0.2.5"])
|
|
62
|
+
end
|
|
63
|
+
else
|
|
64
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
|
65
|
+
s.add_dependency(%q<remote_table>, [">= 0.2.19"])
|
|
66
|
+
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
|
67
|
+
s.add_dependency(%q<andand>, [">= 1.3.1"])
|
|
68
|
+
s.add_dependency(%q<amatch>, [">= 0.2.5"])
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
data/test/helper.rb
CHANGED
|
@@ -2,11 +2,11 @@ require 'rubygems'
|
|
|
2
2
|
require 'test/unit'
|
|
3
3
|
require 'shoulda'
|
|
4
4
|
require 'logger'
|
|
5
|
-
require 'ruby-debug'
|
|
5
|
+
# require 'ruby-debug'
|
|
6
6
|
|
|
7
7
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
8
8
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
9
|
-
require 'loose_tight_dictionary'
|
|
9
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'loose_tight_dictionary'))
|
|
10
10
|
|
|
11
11
|
class Test::Unit::TestCase
|
|
12
12
|
end
|
|
@@ -3,7 +3,7 @@ require 'helper'
|
|
|
3
3
|
require 'remote_table'
|
|
4
4
|
|
|
5
5
|
# $logger = Logger.new STDERR
|
|
6
|
-
# $logger.level = Logger::
|
|
6
|
+
# $logger.level = Logger::DEBUG
|
|
7
7
|
# $tee = STDOUT
|
|
8
8
|
|
|
9
9
|
class TestLooseTightDictionary < Test::Unit::TestCase
|
|
@@ -225,12 +225,15 @@ class TestLooseTightDictionary < Test::Unit::TestCase
|
|
|
225
225
|
ltd.check @left
|
|
226
226
|
end
|
|
227
227
|
end
|
|
228
|
-
|
|
228
|
+
|
|
229
229
|
should "use a Google Docs spreadsheet as a source of tightenings" do
|
|
230
230
|
@positives.push [ @d_left[0], @d_right[0] ]
|
|
231
231
|
@tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
|
|
232
|
-
|
|
233
|
-
|
|
232
|
+
|
|
233
|
+
# sabshere 9/30/10 this shouldn't raise anything
|
|
234
|
+
# but the tightenings have been changed... we should be using test-only tightenings, not production ones
|
|
235
|
+
# assert_nothing_raised do
|
|
236
|
+
assert_raises(LooseTightDictionary::Mismatch) do
|
|
234
237
|
ltd.check @left
|
|
235
238
|
end
|
|
236
239
|
end
|
metadata
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: loose_tight_dictionary
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
+
hash: 13
|
|
4
5
|
prerelease: false
|
|
5
6
|
segments:
|
|
6
7
|
- 0
|
|
7
8
|
- 0
|
|
8
|
-
-
|
|
9
|
-
version: 0.0.
|
|
9
|
+
- 9
|
|
10
|
+
version: 0.0.9
|
|
10
11
|
platform: ruby
|
|
11
12
|
authors:
|
|
12
13
|
- Seamus Abshere
|
|
@@ -14,16 +15,18 @@ autorequire:
|
|
|
14
15
|
bindir: bin
|
|
15
16
|
cert_chain: []
|
|
16
17
|
|
|
17
|
-
date: 2010-
|
|
18
|
+
date: 2010-09-30 00:00:00 -05:00
|
|
18
19
|
default_executable:
|
|
19
20
|
dependencies:
|
|
20
21
|
- !ruby/object:Gem::Dependency
|
|
21
22
|
name: shoulda
|
|
22
23
|
prerelease: false
|
|
23
24
|
requirement: &id001 !ruby/object:Gem::Requirement
|
|
25
|
+
none: false
|
|
24
26
|
requirements:
|
|
25
27
|
- - ">="
|
|
26
28
|
- !ruby/object:Gem::Version
|
|
29
|
+
hash: 3
|
|
27
30
|
segments:
|
|
28
31
|
- 0
|
|
29
32
|
version: "0"
|
|
@@ -33,9 +36,11 @@ dependencies:
|
|
|
33
36
|
name: remote_table
|
|
34
37
|
prerelease: false
|
|
35
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
|
39
|
+
none: false
|
|
36
40
|
requirements:
|
|
37
41
|
- - ">="
|
|
38
42
|
- !ruby/object:Gem::Version
|
|
43
|
+
hash: 49
|
|
39
44
|
segments:
|
|
40
45
|
- 0
|
|
41
46
|
- 2
|
|
@@ -47,9 +52,11 @@ dependencies:
|
|
|
47
52
|
name: activesupport
|
|
48
53
|
prerelease: false
|
|
49
54
|
requirement: &id003 !ruby/object:Gem::Requirement
|
|
55
|
+
none: false
|
|
50
56
|
requirements:
|
|
51
57
|
- - ">="
|
|
52
58
|
- !ruby/object:Gem::Version
|
|
59
|
+
hash: 11
|
|
53
60
|
segments:
|
|
54
61
|
- 2
|
|
55
62
|
- 3
|
|
@@ -57,48 +64,38 @@ dependencies:
|
|
|
57
64
|
version: 2.3.4
|
|
58
65
|
type: :runtime
|
|
59
66
|
version_requirements: *id003
|
|
60
|
-
- !ruby/object:Gem::Dependency
|
|
61
|
-
name: fastercsv
|
|
62
|
-
prerelease: false
|
|
63
|
-
requirement: &id004 !ruby/object:Gem::Requirement
|
|
64
|
-
requirements:
|
|
65
|
-
- - ">="
|
|
66
|
-
- !ruby/object:Gem::Version
|
|
67
|
-
segments:
|
|
68
|
-
- 1
|
|
69
|
-
- 5
|
|
70
|
-
- 3
|
|
71
|
-
version: 1.5.3
|
|
72
|
-
type: :runtime
|
|
73
|
-
version_requirements: *id004
|
|
74
67
|
- !ruby/object:Gem::Dependency
|
|
75
68
|
name: andand
|
|
76
69
|
prerelease: false
|
|
77
|
-
requirement: &
|
|
70
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
|
71
|
+
none: false
|
|
78
72
|
requirements:
|
|
79
73
|
- - ">="
|
|
80
74
|
- !ruby/object:Gem::Version
|
|
75
|
+
hash: 25
|
|
81
76
|
segments:
|
|
82
77
|
- 1
|
|
83
78
|
- 3
|
|
84
79
|
- 1
|
|
85
80
|
version: 1.3.1
|
|
86
81
|
type: :runtime
|
|
87
|
-
version_requirements: *
|
|
82
|
+
version_requirements: *id004
|
|
88
83
|
- !ruby/object:Gem::Dependency
|
|
89
84
|
name: amatch
|
|
90
85
|
prerelease: false
|
|
91
|
-
requirement: &
|
|
86
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
|
87
|
+
none: false
|
|
92
88
|
requirements:
|
|
93
89
|
- - ">="
|
|
94
90
|
- !ruby/object:Gem::Version
|
|
91
|
+
hash: 29
|
|
95
92
|
segments:
|
|
96
93
|
- 0
|
|
97
94
|
- 2
|
|
98
95
|
- 5
|
|
99
96
|
version: 0.2.5
|
|
100
97
|
type: :runtime
|
|
101
|
-
version_requirements: *
|
|
98
|
+
version_requirements: *id005
|
|
102
99
|
description: Create dictionaries that link rows between two tables (left and right) using loose matching (string similarity) by default and tight matching (regexp) by request.
|
|
103
100
|
email: seamus@abshere.net
|
|
104
101
|
executables: []
|
|
@@ -119,6 +116,7 @@ files:
|
|
|
119
116
|
- examples/icao-bts.rb
|
|
120
117
|
- examples/icao-bts.xls
|
|
121
118
|
- lib/loose_tight_dictionary.rb
|
|
119
|
+
- loose_tight_dictionary.gemspec
|
|
122
120
|
- test/helper.rb
|
|
123
121
|
- test/test_loose_tight_dictionary.rb
|
|
124
122
|
has_rdoc: true
|
|
@@ -131,23 +129,27 @@ rdoc_options:
|
|
|
131
129
|
require_paths:
|
|
132
130
|
- lib
|
|
133
131
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
132
|
+
none: false
|
|
134
133
|
requirements:
|
|
135
134
|
- - ">="
|
|
136
135
|
- !ruby/object:Gem::Version
|
|
136
|
+
hash: 3
|
|
137
137
|
segments:
|
|
138
138
|
- 0
|
|
139
139
|
version: "0"
|
|
140
140
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
141
|
+
none: false
|
|
141
142
|
requirements:
|
|
142
143
|
- - ">="
|
|
143
144
|
- !ruby/object:Gem::Version
|
|
145
|
+
hash: 3
|
|
144
146
|
segments:
|
|
145
147
|
- 0
|
|
146
148
|
version: "0"
|
|
147
149
|
requirements: []
|
|
148
150
|
|
|
149
151
|
rubyforge_project:
|
|
150
|
-
rubygems_version: 1.3.
|
|
152
|
+
rubygems_version: 1.3.7
|
|
151
153
|
signing_key:
|
|
152
154
|
specification_version: 3
|
|
153
155
|
summary: Allows iterative development of dictionaries for big data sets.
|