errata 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -1
- data/CHANGELOG +14 -0
- data/Gemfile +10 -2
- data/README.md +181 -0
- data/Rakefile +6 -14
- data/errata.gemspec +1 -5
- data/lib/errata.rb +75 -29
- data/lib/errata/erratum.rb +39 -45
- data/lib/errata/erratum/delete.rb +9 -2
- data/lib/errata/erratum/replace.rb +9 -2
- data/lib/errata/erratum/simplify.rb +7 -2
- data/lib/errata/erratum/transform.rb +7 -4
- data/lib/errata/erratum/truncate.rb +5 -2
- data/lib/errata/version.rb +1 -1
- data/test/helper.rb +13 -9
- data/test/{test_old_syntax.rb → models.rb} +0 -41
- data/test/test_errata.rb +25 -0
- data/test/test_old_style.rb +41 -0
- metadata +64 -104
- data/README.rdoc +0 -35
data/.gitignore
CHANGED
data/CHANGELOG
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
1.1.0 / 2012-05-03
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Errata#options is no longer available for introspection. Use Errata#lazy_load_table_options and Errata#responder if that helps.
|
6
|
+
|
7
|
+
* Enhancements
|
8
|
+
|
9
|
+
* Tested on MRI 1.8, MRI 1.9, and JRuby 1.6.7+
|
10
|
+
* Modernize Rakefile and test helper
|
11
|
+
* Got rid of autoload and made thread-safe in general
|
12
|
+
* Convert to minitest
|
13
|
+
* Strictly correct lazy-loading
|
14
|
+
* Added warnings for deprecated usage styles.
|
data/Gemfile
CHANGED
@@ -1,4 +1,12 @@
|
|
1
|
-
source
|
1
|
+
source :rubygems
|
2
2
|
|
3
|
-
# Specify your gem's dependencies in errata.gemspec
|
4
3
|
gemspec
|
4
|
+
|
5
|
+
# development dependencies
|
6
|
+
gem 'minitest'
|
7
|
+
gem 'minitest-reporters'
|
8
|
+
gem 'rake'
|
9
|
+
gem 'yard'
|
10
|
+
unless RUBY_VERSION >= '1.9'
|
11
|
+
gem 'fastercsv'
|
12
|
+
end
|
data/README.md
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
# errata
|
2
|
+
|
3
|
+
Correct strings based on remote errata files.
|
4
|
+
|
5
|
+
# Example
|
6
|
+
|
7
|
+
Every errata has a table structure based on the [IETF RFC Editor's "How to Report Errata"](http://www.rfc-editor.org/how_to_report.html).
|
8
|
+
|
9
|
+
<table>
|
10
|
+
<tr>
|
11
|
+
<th>date</th>
|
12
|
+
<th>name</th>
|
13
|
+
<th>email</th>
|
14
|
+
<th>type</th>
|
15
|
+
<th>section</th>
|
16
|
+
<th>action</th>
|
17
|
+
<th>x</th>
|
18
|
+
<th>y</th>
|
19
|
+
<th>condition</th>
|
20
|
+
<th>notes</th>
|
21
|
+
</tr>
|
22
|
+
<tr>
|
23
|
+
<td>2011-03-22</td>
|
24
|
+
<td>Ian Hough</td>
|
25
|
+
<td>ian@brighterplanet.com</td>
|
26
|
+
<td>meta</td>
|
27
|
+
<td>Intended use</td>
|
28
|
+
<td></td>
|
29
|
+
<td>http://example.com/original-data-with-errors.xls</td>
|
30
|
+
<td></td>
|
31
|
+
<td></td>
|
32
|
+
<td>A hypothetical document that uses non-ISO country names</td>
|
33
|
+
</tr>
|
34
|
+
<tr>
|
35
|
+
<td>2011-03-22</td>
|
36
|
+
<td>Ian Hough</td>
|
37
|
+
<td>ian@brighterplanet.com</td>
|
38
|
+
<td>technical</td>
|
39
|
+
<td>Country Name</td>
|
40
|
+
<td>replace</td>
|
41
|
+
<td>/ANTIGUA & BARBUDA/</td>
|
42
|
+
<td>ANTIGUA AND BARBUDA</td>
|
43
|
+
<td></td>
|
44
|
+
<td></td>
|
45
|
+
</tr>
|
46
|
+
<tr>
|
47
|
+
<td>2011-03-22</td>
|
48
|
+
<td>Ian Hough</td>
|
49
|
+
<td>ian@brighterplanet.com</td>
|
50
|
+
<td>technical</td>
|
51
|
+
<td>Country Name</td>
|
52
|
+
<td>replace</td>
|
53
|
+
<td>/BOLIVIA/</td>
|
54
|
+
<td>BOLIVIA, PLURINATIONAL STATE OF</td>
|
55
|
+
<td></td>
|
56
|
+
<td></td>
|
57
|
+
</tr>
|
58
|
+
<tr>
|
59
|
+
<td>2011-03-22</td>
|
60
|
+
<td>Ian Hough</td>
|
61
|
+
<td>ian@brighterplanet.com</td>
|
62
|
+
<td>technical</td>
|
63
|
+
<td>Country Name</td>
|
64
|
+
<td>replace</td>
|
65
|
+
<td>/BOSNIA & HERZEGOVINA/</td>
|
66
|
+
<td>BOSNIA AND HERZEGOVINA</td>
|
67
|
+
<td></td>
|
68
|
+
<td></td>
|
69
|
+
</tr>
|
70
|
+
<tr>
|
71
|
+
<td>2011-03-22</td>
|
72
|
+
<td>Ian Hough</td>
|
73
|
+
<td>ian@brighterplanet.com</td>
|
74
|
+
<td>technical</td>
|
75
|
+
<td>Country Name</td>
|
76
|
+
<td>replace</td>
|
77
|
+
<td>/BRITISH VIRGIN ISLANDS/</td>
|
78
|
+
<td>VIRGIN ISLANDS, BRITISH</td>
|
79
|
+
<td></td>
|
80
|
+
<td></td>
|
81
|
+
</tr>
|
82
|
+
<tr>
|
83
|
+
<td>2011-03-22</td>
|
84
|
+
<td>Ian Hough</td>
|
85
|
+
<td>ian@brighterplanet.com</td>
|
86
|
+
<td>technical</td>
|
87
|
+
<td>Country Name</td>
|
88
|
+
<td>replace</td>
|
89
|
+
<td>/COTE D'IVOIRE/</td>
|
90
|
+
<td>CÔTE D'IVOIRE</td>
|
91
|
+
<td></td>
|
92
|
+
<td></td>
|
93
|
+
</tr>
|
94
|
+
<tr>
|
95
|
+
<td>2011-03-22</td>
|
96
|
+
<td>Ian Hough</td>
|
97
|
+
<td>ian@brighterplanet.com</td>
|
98
|
+
<td>technical</td>
|
99
|
+
<td>Country Name</td>
|
100
|
+
<td>replace</td>
|
101
|
+
<td>/DEM\. PEOPLE'S REP\. OF KOREA/</td>
|
102
|
+
<td>KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF</td>
|
103
|
+
<td></td>
|
104
|
+
<td></td>
|
105
|
+
</tr>
|
106
|
+
<tr>
|
107
|
+
<td>2011-03-22</td>
|
108
|
+
<td>Ian Hough</td>
|
109
|
+
<td>ian@brighterplanet.com</td>
|
110
|
+
<td>technical</td>
|
111
|
+
<td>Country Name</td>
|
112
|
+
<td>replace</td>
|
113
|
+
<td>/DEM\. REP\. OF THE CONGO/</td>
|
114
|
+
<td>CONGO, THE DEMOCRATIC REPUBLIC OF THE</td>
|
115
|
+
<td></td>
|
116
|
+
<td></td>
|
117
|
+
</tr>
|
118
|
+
<tr>
|
119
|
+
<td>2011-03-22</td>
|
120
|
+
<td>Ian Hough</td>
|
121
|
+
<td>ian@brighterplanet.com</td>
|
122
|
+
<td>technical</td>
|
123
|
+
<td>Country Name</td>
|
124
|
+
<td>replace</td>
|
125
|
+
<td>/HONG KONG SAR/</td>
|
126
|
+
<td>HONG KONG</td>
|
127
|
+
<td></td>
|
128
|
+
<td></td>
|
129
|
+
</tr>
|
130
|
+
<tr>
|
131
|
+
<td>2011-03-22</td>
|
132
|
+
<td>Ian Hough</td>
|
133
|
+
<td>ian@brighterplanet.com</td>
|
134
|
+
<td>technical</td>
|
135
|
+
<td>Country Name</td>
|
136
|
+
<td>replace</td>
|
137
|
+
<td>/IRAN \(ISLAMIC REPUBLIC OF\)/</td>
|
138
|
+
<td>IRAN, ISLAMIC REPUBLIC OF</td>
|
139
|
+
<td></td>
|
140
|
+
<td></td>
|
141
|
+
</tr>
|
142
|
+
</table>
|
143
|
+
|
144
|
+
Which would be saved as a CSV:
|
145
|
+
|
146
|
+
date,name,email,type,section,action,x,y,condition,notes
|
147
|
+
2011-03-22,Ian Hough,ian@brighterplanet.com,meta,Intended use,,http://example.com/original-data-with-errors.xls,,A hypothetical document that uses non-ISO country names
|
148
|
+
2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/ANTIGUA & BARBUDA/,ANTIGUA AND BARBUDA,,
|
149
|
+
2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/BOLIVIA/,"BOLIVIA, PLURINATIONAL STATE OF",,
|
150
|
+
2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/BOSNIA & HERZEGOVINA/,BOSNIA AND HERZEGOVINA,,
|
151
|
+
2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/BRITISH VIRGIN ISLANDS/,"VIRGIN ISLANDS, BRITISH",,
|
152
|
+
2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/COTE D'IVOIRE/,CÔTE D'IVOIRE,,
|
153
|
+
2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/DEM\. PEOPLE'S REP\. OF KOREA/,"KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF",,
|
154
|
+
2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/DEM\. REP\. OF THE CONGO/,"CONGO, THE DEMOCRATIC REPUBLIC OF THE",,
|
155
|
+
2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/HONG KONG SAR/,HONG KONG,,
|
156
|
+
2011-03-22,Ian Hough,ian@brighterplanet.com,technical,Country Name,replace,/IRAN \(ISLAMIC REPUBLIC OF\)/,"IRAN, ISLAMIC REPUBLIC OF",,
|
157
|
+
|
158
|
+
And then used
|
159
|
+
|
160
|
+
errata = Errata.new(:url => 'http://example.com/errata.csv')
|
161
|
+
original = RemoteTable.new(:url => 'http://example.com/original-data-with-errors.xls')
|
162
|
+
original.each do |row|
|
163
|
+
errata.correct! row # destructively correct each row
|
164
|
+
end
|
165
|
+
|
166
|
+
## UTF-8
|
167
|
+
|
168
|
+
Assumes all input strings are UTF-8. Otherwise there can be problems with Ruby 1.9 and Regexp::FIXEDENCODING. Specifically, ASCII-8BIT regexps might be applied to UTF-8 strings (or vice-versa), resulting in Encoding::CompatibilityError.
|
169
|
+
|
170
|
+
## Real-life usage
|
171
|
+
|
172
|
+
Used by [data_miner](http://github.com/seamusabshere/data_miner)
|
173
|
+
|
174
|
+
## Authors
|
175
|
+
|
176
|
+
* Seamus Abshere <seamus@abshere.net>
|
177
|
+
* Andy Rossmeissl <andy@rossmeissl.net>
|
178
|
+
|
179
|
+
## Copyright
|
180
|
+
|
181
|
+
Copyright (c) 2011 Brighter Planet. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -1,25 +1,17 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
3
|
|
4
4
|
require 'rake'
|
5
5
|
require 'rake/testtask'
|
6
6
|
Rake::TestTask.new(:test) do |test|
|
7
|
-
test.libs << '
|
7
|
+
test.libs << 'test'
|
8
8
|
test.pattern = 'test/**/test_*.rb'
|
9
9
|
test.verbose = true
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
rdoc.rdoc_dir = 'rdoc'
|
16
|
-
rdoc.title = 'errata'
|
17
|
-
rdoc.options << '--line-numbers' << '--inline-source'
|
18
|
-
rdoc.rdoc_files.include('README*')
|
19
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
20
|
-
end
|
21
|
-
rescue LoadError
|
22
|
-
puts "Rdoc is not available"
|
12
|
+
require 'yard'
|
13
|
+
YARD::Rake::YardocTask.new do |y|
|
14
|
+
y.options << '--no-private'
|
23
15
|
end
|
24
16
|
|
25
17
|
task :default => :test
|
data/errata.gemspec
CHANGED
@@ -1,11 +1,9 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
|
3
|
-
require "errata/version"
|
2
|
+
require File.expand_path("../lib/errata/version", __FILE__)
|
4
3
|
|
5
4
|
Gem::Specification.new do |s|
|
6
5
|
s.name = "errata"
|
7
6
|
s.version = Errata::VERSION
|
8
|
-
s.platform = Gem::Platform::RUBY
|
9
7
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
10
8
|
s.email = ["seamus@abshere.net"]
|
11
9
|
s.homepage = "https://github.com/seamusabshere/errata"
|
@@ -22,6 +20,4 @@ Gem::Specification.new do |s|
|
|
22
20
|
s.add_dependency 'activesupport', '>=2.3.4'
|
23
21
|
s.add_dependency 'remote_table', '>=1.1.7'
|
24
22
|
s.add_dependency 'to_regexp', '>= 0.0.2'
|
25
|
-
s.add_development_dependency 'test-unit'
|
26
|
-
s.add_development_dependency 'shoulda'
|
27
23
|
end
|
data/lib/errata.rb
CHANGED
@@ -1,33 +1,47 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
1
3
|
require 'active_support'
|
2
4
|
require 'active_support/version'
|
3
|
-
|
4
|
-
active_support/core_ext
|
5
|
-
|
6
|
-
}.each do |active_support_3_requirement|
|
7
|
-
require active_support_3_requirement
|
8
|
-
end if ::ActiveSupport::VERSION::MAJOR == 3
|
5
|
+
if ::ActiveSupport::VERSION::MAJOR >= 3
|
6
|
+
require 'active_support/core_ext'
|
7
|
+
end
|
9
8
|
require 'remote_table'
|
10
9
|
|
10
|
+
require 'errata/erratum'
|
11
|
+
|
11
12
|
class Errata
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
CORRECTIONS = %w{delete replace simplify transform truncate reject}
|
14
|
+
|
15
|
+
attr_reader :lazy_load_table_options
|
16
|
+
attr_reader :lazy_load_responder_class_name
|
15
17
|
|
16
|
-
attr_reader :options
|
17
|
-
|
18
18
|
# Arguments
|
19
|
-
# * <tt
|
20
|
-
# * <tt
|
21
|
-
# If and only if you don't pass <tt
|
19
|
+
# * <tt>:responder</tt> (required) - normally you pass this something like Guru.new, which should respond to questions like #is_a_bentley?. If you pass a string, it will be lazily constantized and a new object initialized from it; for example, 'Guru' will lead to 'Guru'.constantize.new.
|
20
|
+
# * <tt>:table</tt> - takes something that acts like a RemoteTable
|
21
|
+
# If and only if you don't pass <tt>:table</tt>, all other options will be passed to a new RemoteTable (for example, <tt>:url</tt>, etc.)
|
22
22
|
def initialize(options = {})
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
23
|
+
options = options.symbolize_keys
|
24
|
+
|
25
|
+
responder = options.delete :responder
|
26
|
+
raise "[errata] :responder is required" unless responder
|
27
|
+
if responder.is_a?(::String)
|
28
|
+
@lazy_load_responder_mutex = ::Mutex.new
|
29
|
+
@lazy_load_responder_class_name = responder
|
30
|
+
else
|
31
|
+
::Kernel.warn %{[errata] Passing an object as :responder is deprecated. It's recommended to pass a class name instead, which will be constantized and instantiated with no arguments.}
|
32
|
+
@responder = responder
|
33
|
+
end
|
34
|
+
|
35
|
+
if table = options.delete(:table)
|
36
|
+
::Kernel.warn %{[errata] Passing :table is deprecated. It's recommended to pass table options instead.}
|
37
|
+
@table = table
|
38
|
+
else
|
39
|
+
@lazy_load_table_options = options
|
40
|
+
end
|
41
|
+
|
42
|
+
@set_rejections_and_corrections_mutex = ::Mutex.new
|
29
43
|
end
|
30
|
-
|
44
|
+
|
31
45
|
def rejects?(row)
|
32
46
|
rejections.any? { |erratum| erratum.targets?(row) }
|
33
47
|
end
|
@@ -36,21 +50,53 @@ class Errata
|
|
36
50
|
corrections.each { |erratum| erratum.correct!(row) }
|
37
51
|
nil
|
38
52
|
end
|
39
|
-
|
40
|
-
def
|
41
|
-
@
|
42
|
-
|
43
|
-
|
53
|
+
|
54
|
+
def responder
|
55
|
+
@responder || @lazy_load_responder_mutex.synchronize do
|
56
|
+
@responder ||= lazy_load_responder_class_name.constantize.new
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def set_rejections_and_corrections!
|
63
|
+
return if @set_rejections_and_corrections == true
|
64
|
+
@set_rejections_and_corrections_mutex.synchronize do
|
65
|
+
return if @set_rejections_and_corrections == true
|
66
|
+
|
67
|
+
if @table
|
68
|
+
table = @table
|
69
|
+
@table = nil # won't need this again
|
70
|
+
else
|
71
|
+
table = ::RemoteTable.new lazy_load_table_options
|
72
|
+
end
|
73
|
+
|
74
|
+
rejections = []
|
75
|
+
corrections = []
|
76
|
+
|
77
|
+
table.each do |erratum_initializer|
|
78
|
+
erratum_initializer = erratum_initializer.symbolize_keys
|
79
|
+
action = erratum_initializer[:action].downcase
|
80
|
+
if action == 'reject'
|
81
|
+
rejections << Erratum::Reject.new(responder, erratum_initializer)
|
82
|
+
elsif CORRECTIONS.include?(action)
|
83
|
+
corrections << Erratum.const_get(action.camelcase).new(responder, erratum_initializer)
|
84
|
+
end
|
44
85
|
end
|
45
|
-
|
86
|
+
|
87
|
+
@rejections = rejections
|
88
|
+
@corrections = corrections
|
89
|
+
@set_rejections_and_corrections = true
|
46
90
|
end
|
47
91
|
end
|
48
92
|
|
49
93
|
def rejections
|
50
|
-
|
94
|
+
set_rejections_and_corrections!
|
95
|
+
@rejections
|
51
96
|
end
|
52
97
|
|
53
98
|
def corrections
|
54
|
-
|
99
|
+
set_rejections_and_corrections!
|
100
|
+
@corrections
|
55
101
|
end
|
56
102
|
end
|
data/lib/errata/erratum.rb
CHANGED
@@ -1,36 +1,50 @@
|
|
1
1
|
require 'to_regexp'
|
2
2
|
|
3
|
+
require 'errata/erratum/delete'
|
4
|
+
require 'errata/erratum/reject'
|
5
|
+
require 'errata/erratum/replace'
|
6
|
+
require 'errata/erratum/simplify'
|
7
|
+
require 'errata/erratum/transform'
|
8
|
+
require 'errata/erratum/truncate'
|
9
|
+
|
3
10
|
class Errata
|
4
11
|
class Erratum
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
attr_reader :
|
13
|
-
attr_reader :options
|
14
|
-
|
15
|
-
def initialize(errata, options = {})
|
16
|
-
@errata = errata
|
17
|
-
@options = options.dup
|
18
|
-
end
|
12
|
+
SEMICOLON_DELIMITER = /\s*;\s*/
|
13
|
+
SPECIAL_ABBR = /\Aabbr\((.*)\)\z/
|
14
|
+
REJECT_ACTIONS = %w{reject truncate}
|
15
|
+
|
16
|
+
attr_reader :responder
|
17
|
+
attr_reader :section
|
18
|
+
attr_reader :matching_methods
|
19
|
+
attr_reader :matching_expression
|
19
20
|
|
20
|
-
def
|
21
|
-
|
21
|
+
def initialize(responder, options = {})
|
22
|
+
@responder = responder
|
23
|
+
@section = options[:section]
|
24
|
+
@matching_methods = options[:condition].split(SEMICOLON_DELIMITER).map do |method_id|
|
25
|
+
method_id.strip.gsub(/\W/, '_').downcase + '?'
|
26
|
+
end
|
27
|
+
@matching_expression = if options[:x].blank?
|
28
|
+
nil
|
29
|
+
elsif (options[:x].start_with?('/') or options[:x].start_with?('%r{')) and as_regexp = options[:x].as_regexp
|
30
|
+
::Regexp.new(*as_regexp)
|
31
|
+
elsif SPECIAL_ABBR.match options[:x]
|
32
|
+
@abbr_query = true
|
33
|
+
abbr = $1.split(/(\w\??)/).reject { |a| a == '' }.join('\.?\s?') + '\.?([^\w\.]|\z)'
|
34
|
+
expr = '(\A|\s)' + abbr
|
35
|
+
::Regexp.new expr, true
|
36
|
+
elsif REJECT_ACTIONS.include? options[:action]
|
37
|
+
expr = '\A\s*' + ::Regexp.escape(options[:x])
|
38
|
+
::Regexp.new expr, true
|
39
|
+
else
|
40
|
+
options[:x]
|
41
|
+
end
|
22
42
|
end
|
23
|
-
|
24
|
-
def
|
25
|
-
|
43
|
+
|
44
|
+
def abbr?
|
45
|
+
@abbr_query == true
|
26
46
|
end
|
27
47
|
|
28
|
-
def matching_methods
|
29
|
-
@matching_methods ||= options['condition'].split(/\s*;\s*/).map do |method_id|
|
30
|
-
"#{method_id.strip.gsub(/[^a-z0-9]/i, '_').downcase}?"
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
48
|
def targets?(row)
|
35
49
|
!!(conditions_match?(row) and expression_matches?(row))
|
36
50
|
end
|
@@ -48,25 +62,5 @@ class Errata
|
|
48
62
|
def conditions_match?(row)
|
49
63
|
matching_methods.all? { |method_id| responder.send method_id, row }
|
50
64
|
end
|
51
|
-
|
52
|
-
def matching_expression
|
53
|
-
return @matching_expression[0] if @matching_expression.is_a? ::Array
|
54
|
-
@matching_expression = []
|
55
|
-
@matching_expression[0] = if options['x'].blank?
|
56
|
-
nil
|
57
|
-
elsif (options['x'].start_with?('/') or options['x'].start_with?('%r{')) and as_regexp = options['x'].as_regexp
|
58
|
-
::Regexp.new(*as_regexp)
|
59
|
-
elsif /\Aabbr\((.*)\)\z/.match options['x']
|
60
|
-
abbr = $1.split(/(\w\??)/).reject { |a| a == '' }.join('\.?\s?') + '\.?([^\w\.]|\z)'
|
61
|
-
expr = '(\A|\s)' + abbr
|
62
|
-
::Regexp.new expr, true
|
63
|
-
elsif %w{reject truncate}.include? options['action']
|
64
|
-
expr = '\A\s*' + ::Regexp.escape(options['x'])
|
65
|
-
::Regexp.new expr, true
|
66
|
-
else
|
67
|
-
options['x']
|
68
|
-
end
|
69
|
-
@matching_expression[0]
|
70
|
-
end
|
71
65
|
end
|
72
66
|
end
|
@@ -1,9 +1,16 @@
|
|
1
1
|
class Errata
|
2
2
|
class Erratum
|
3
3
|
class Delete < Erratum
|
4
|
-
|
4
|
+
attr_reader :backfill
|
5
|
+
|
6
|
+
def initialize(responder, options = {})
|
7
|
+
super
|
5
8
|
# otherwise abbr(X) will kill the characters before and after the match
|
6
|
-
@backfill
|
9
|
+
@backfill = if abbr?
|
10
|
+
'\1\2'
|
11
|
+
else
|
12
|
+
''
|
13
|
+
end
|
7
14
|
end
|
8
15
|
|
9
16
|
def correct!(row)
|
@@ -1,8 +1,15 @@
|
|
1
1
|
class Errata
|
2
2
|
class Erratum
|
3
3
|
class Replace < Erratum
|
4
|
-
|
5
|
-
|
4
|
+
attr_reader :correction
|
5
|
+
|
6
|
+
def initialize(responder, options = {})
|
7
|
+
super
|
8
|
+
@correction = if abbr?
|
9
|
+
'\1' + options[:y].to_s + '\2'
|
10
|
+
else
|
11
|
+
options[:y].to_s
|
12
|
+
end
|
6
13
|
end
|
7
14
|
|
8
15
|
def correct!(row)
|
@@ -1,8 +1,11 @@
|
|
1
1
|
class Errata
|
2
2
|
class Erratum
|
3
3
|
class Simplify < Erratum
|
4
|
-
|
5
|
-
|
4
|
+
attr_reader :second_section
|
5
|
+
|
6
|
+
def initialize(responder, options = {})
|
7
|
+
super
|
8
|
+
@second_section = options[:x]
|
6
9
|
end
|
7
10
|
|
8
11
|
def targets?(row)
|
@@ -15,6 +18,8 @@ class Errata
|
|
15
18
|
end
|
16
19
|
end
|
17
20
|
|
21
|
+
private
|
22
|
+
|
18
23
|
def special_matcher(row)
|
19
24
|
/[\s\(\[\'\"]*#{::Regexp.escape(row[second_section])}[\s\)\]\'\"]*/
|
20
25
|
end
|
@@ -3,11 +3,14 @@ class Errata
|
|
3
3
|
class Transform < Erratum
|
4
4
|
ALLOWED_METHODS = %w{upcase downcase}
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
attr_reader :string_method
|
7
|
+
|
8
|
+
def initialize(responder, options = {})
|
9
|
+
super
|
10
|
+
@string_method = options[:y]
|
11
|
+
raise %{[errata] Method "#{@string_method}" not allowed} unless ALLOWED_METHODS.include? @string_method
|
9
12
|
end
|
10
|
-
|
13
|
+
|
11
14
|
def correct!(row)
|
12
15
|
if targets? row
|
13
16
|
row[section].gsub!(matching_expression) { |match| match.send string_method }
|
@@ -1,8 +1,11 @@
|
|
1
1
|
class Errata
|
2
2
|
class Erratum
|
3
3
|
class Truncate < Erratum
|
4
|
-
|
5
|
-
|
4
|
+
attr_reader :necessary_and_sufficient_prefix
|
5
|
+
|
6
|
+
def initialize(responder, options = {})
|
7
|
+
super
|
8
|
+
@necessary_and_sufficient_prefix = options[:x]
|
6
9
|
end
|
7
10
|
|
8
11
|
def correct!(row)
|
data/lib/errata/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -1,14 +1,18 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require 'bundler'
|
2
|
+
require 'bundler/setup'
|
3
|
+
|
4
|
+
if Bundler.definition.specs['ruby-debug19'].first or Bundler.definition.specs['ruby-debug'].first
|
5
|
+
require 'ruby-debug'
|
6
|
+
end
|
7
|
+
|
8
|
+
require 'minitest/spec'
|
9
|
+
require 'minitest/autorun'
|
10
|
+
require 'minitest/reporters'
|
11
|
+
MiniTest::Unit.runner = MiniTest::SuiteRunner.new
|
12
|
+
MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
|
13
|
+
|
3
14
|
unless RUBY_VERSION >= '1.9'
|
4
|
-
gem 'fastercsv'
|
5
15
|
require 'fastercsv'
|
6
16
|
end
|
7
|
-
|
8
|
-
require 'test/unit'
|
9
|
-
require 'shoulda'
|
10
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
11
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
17
|
+
|
12
18
|
require 'errata'
|
13
|
-
class Test::Unit::TestCase
|
14
|
-
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'helper'
|
2
|
-
|
3
1
|
class AutomobileVariantGuru
|
4
2
|
def transmission_is_blank?(row)
|
5
3
|
row['transmission'].blank?
|
@@ -53,42 +51,3 @@ class AutomobileVariantGuru
|
|
53
51
|
row.slice('CAR LINE', 'carline name', 'carline_name').any? { |k, v| v =~ /BENTLEY/i }
|
54
52
|
end
|
55
53
|
end
|
56
|
-
|
57
|
-
class TestOldSyntax < Test::Unit::TestCase
|
58
|
-
def setup
|
59
|
-
@e = Errata.new :table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg'),
|
60
|
-
:responder => AutomobileVariantGuru.new
|
61
|
-
end
|
62
|
-
|
63
|
-
should "correct rows" do
|
64
|
-
alfa = { "carline_mfr_name"=>"ALFA ROMEO" }
|
65
|
-
@e.correct!(alfa)
|
66
|
-
assert_equal 'Alfa Romeo', alfa['carline_mfr_name']
|
67
|
-
end
|
68
|
-
|
69
|
-
should "reject rows" do
|
70
|
-
assert @e.rejects?('carline_mfr_name' => 'AURORA CARS')
|
71
|
-
end
|
72
|
-
|
73
|
-
should "lazily constantize and initialize responder" do
|
74
|
-
e = Errata.new :table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg'),
|
75
|
-
:responder => 'AutomobileVariantGuru'
|
76
|
-
alfa = { "carline_mfr_name"=>"ALFA ROMEO" }
|
77
|
-
e.correct!(alfa)
|
78
|
-
assert_equal 'Alfa Romeo', alfa['carline_mfr_name']
|
79
|
-
end
|
80
|
-
|
81
|
-
should "pass options to RemoteTable if no :table is specified" do
|
82
|
-
e = Errata.new :url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg',
|
83
|
-
:responder => AutomobileVariantGuru.new
|
84
|
-
alfa = { "carline_mfr_name"=>"ALFA ROMEO" }
|
85
|
-
e.correct!(alfa)
|
86
|
-
assert_equal 'Alfa Romeo', alfa['carline_mfr_name']
|
87
|
-
end
|
88
|
-
|
89
|
-
should "try multiple conditions" do
|
90
|
-
bentley = { 'carline_mfr_name' => 'ROLLS-ROYCE BENTLEY', "carline name" => 'Super Bentley' }
|
91
|
-
@e.correct!(bentley)
|
92
|
-
assert_equal 'Bentley', bentley['carline_mfr_name']
|
93
|
-
end
|
94
|
-
end
|
data/test/test_errata.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'models'
|
3
|
+
|
4
|
+
describe Errata do
|
5
|
+
before do
|
6
|
+
@e = Errata.new :url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg',
|
7
|
+
:responder => 'AutomobileVariantGuru'
|
8
|
+
end
|
9
|
+
|
10
|
+
it "corrects rows" do
|
11
|
+
alfa = { "carline_mfr_name"=>"ALFA ROMEO" }
|
12
|
+
@e.correct!(alfa)
|
13
|
+
alfa['carline_mfr_name'].must_equal 'Alfa Romeo'
|
14
|
+
end
|
15
|
+
|
16
|
+
it "rejects rows" do
|
17
|
+
@e.rejects?('carline_mfr_name' => 'AURORA CARS').must_equal true
|
18
|
+
end
|
19
|
+
|
20
|
+
it "tries multiple conditions" do
|
21
|
+
bentley = { 'carline_mfr_name' => 'ROLLS-ROYCE BENTLEY', "carline name" => 'Super Bentley' }
|
22
|
+
@e.correct!(bentley)
|
23
|
+
bentley['carline_mfr_name'].must_equal 'Bentley'
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'models'
|
3
|
+
|
4
|
+
describe 'old-style Errata usage' do
|
5
|
+
before do
|
6
|
+
@e = Errata.new :table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg'),
|
7
|
+
:responder => AutomobileVariantGuru.new
|
8
|
+
end
|
9
|
+
|
10
|
+
it "corrects rows" do
|
11
|
+
alfa = { "carline_mfr_name"=>"ALFA ROMEO" }
|
12
|
+
@e.correct!(alfa)
|
13
|
+
alfa['carline_mfr_name'].must_equal 'Alfa Romeo'
|
14
|
+
end
|
15
|
+
|
16
|
+
it "rejects rows" do
|
17
|
+
@e.rejects?('carline_mfr_name' => 'AURORA CARS').must_equal true
|
18
|
+
end
|
19
|
+
|
20
|
+
it "lazily constantizes and initializes responder" do
|
21
|
+
e = Errata.new :table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg'),
|
22
|
+
:responder => 'AutomobileVariantGuru'
|
23
|
+
alfa = { "carline_mfr_name"=>"ALFA ROMEO" }
|
24
|
+
e.correct!(alfa)
|
25
|
+
alfa['carline_mfr_name'].must_equal 'Alfa Romeo'
|
26
|
+
end
|
27
|
+
|
28
|
+
it "passes options to RemoteTable if no :table is specified" do
|
29
|
+
e = Errata.new :url => 'http://spreadsheets.google.com/pub?key=t9WkYT39zjrStx7ruCFrZJg',
|
30
|
+
:responder => AutomobileVariantGuru.new
|
31
|
+
alfa = { "carline_mfr_name"=>"ALFA ROMEO" }
|
32
|
+
e.correct!(alfa)
|
33
|
+
alfa['carline_mfr_name'].must_equal 'Alfa Romeo'
|
34
|
+
end
|
35
|
+
|
36
|
+
it "tries multiple conditions" do
|
37
|
+
bentley = { 'carline_mfr_name' => 'ROLLS-ROYCE BENTLEY', "carline name" => 'Super Bentley' }
|
38
|
+
@e.correct!(bentley)
|
39
|
+
bentley['carline_mfr_name'].must_equal 'Bentley'
|
40
|
+
end
|
41
|
+
end
|
metadata
CHANGED
@@ -1,114 +1,78 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: errata
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 0
|
9
|
-
- 3
|
10
|
-
version: 1.0.3
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Seamus Abshere
|
14
9
|
- Andy Rossmeissl
|
15
10
|
autorequire:
|
16
11
|
bindir: bin
|
17
12
|
cert_chain: []
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
- !ruby/object:Gem::Dependency
|
13
|
+
date: 2012-05-03 00:00:00.000000000 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
22
16
|
name: activesupport
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
17
|
+
requirement: !ruby/object:Gem::Requirement
|
25
18
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
hash: 11
|
30
|
-
segments:
|
31
|
-
- 2
|
32
|
-
- 3
|
33
|
-
- 4
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
34
22
|
version: 2.3.4
|
35
23
|
type: :runtime
|
36
|
-
version_requirements: *id001
|
37
|
-
- !ruby/object:Gem::Dependency
|
38
|
-
name: remote_table
|
39
24
|
prerelease: false
|
40
|
-
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ! '>='
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: 2.3.4
|
31
|
+
- !ruby/object:Gem::Dependency
|
32
|
+
name: remote_table
|
33
|
+
requirement: !ruby/object:Gem::Requirement
|
41
34
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
hash: 29
|
46
|
-
segments:
|
47
|
-
- 1
|
48
|
-
- 1
|
49
|
-
- 7
|
35
|
+
requirements:
|
36
|
+
- - ! '>='
|
37
|
+
- !ruby/object:Gem::Version
|
50
38
|
version: 1.1.7
|
51
39
|
type: :runtime
|
52
|
-
version_requirements: *id002
|
53
|
-
- !ruby/object:Gem::Dependency
|
54
|
-
name: to_regexp
|
55
40
|
prerelease: false
|
56
|
-
|
41
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
42
|
none: false
|
58
|
-
requirements:
|
59
|
-
- -
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 1.1.7
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: to_regexp
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
66
54
|
version: 0.0.2
|
67
55
|
type: :runtime
|
68
|
-
version_requirements: *id003
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: test-unit
|
71
56
|
prerelease: false
|
72
|
-
|
57
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
58
|
none: false
|
74
|
-
requirements:
|
75
|
-
- -
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
|
78
|
-
segments:
|
79
|
-
- 0
|
80
|
-
version: "0"
|
81
|
-
type: :development
|
82
|
-
version_requirements: *id004
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: shoulda
|
85
|
-
prerelease: false
|
86
|
-
requirement: &id005 !ruby/object:Gem::Requirement
|
87
|
-
none: false
|
88
|
-
requirements:
|
89
|
-
- - ">="
|
90
|
-
- !ruby/object:Gem::Version
|
91
|
-
hash: 3
|
92
|
-
segments:
|
93
|
-
- 0
|
94
|
-
version: "0"
|
95
|
-
type: :development
|
96
|
-
version_requirements: *id005
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 0.0.2
|
97
63
|
description: Correct strings based on remote errata files.
|
98
|
-
email:
|
64
|
+
email:
|
99
65
|
- seamus@abshere.net
|
100
66
|
executables: []
|
101
|
-
|
102
67
|
extensions: []
|
103
|
-
|
104
68
|
extra_rdoc_files: []
|
105
|
-
|
106
|
-
files:
|
69
|
+
files:
|
107
70
|
- .document
|
108
71
|
- .gitignore
|
72
|
+
- CHANGELOG
|
109
73
|
- Gemfile
|
110
74
|
- LICENSE
|
111
|
-
- README.
|
75
|
+
- README.md
|
112
76
|
- Rakefile
|
113
77
|
- errata.gemspec
|
114
78
|
- lib/errata.rb
|
@@ -121,40 +85,36 @@ files:
|
|
121
85
|
- lib/errata/erratum/truncate.rb
|
122
86
|
- lib/errata/version.rb
|
123
87
|
- test/helper.rb
|
124
|
-
- test/
|
88
|
+
- test/models.rb
|
89
|
+
- test/test_errata.rb
|
90
|
+
- test/test_old_style.rb
|
125
91
|
homepage: https://github.com/seamusabshere/errata
|
126
92
|
licenses: []
|
127
|
-
|
128
93
|
post_install_message:
|
129
94
|
rdoc_options: []
|
130
|
-
|
131
|
-
require_paths:
|
95
|
+
require_paths:
|
132
96
|
- lib
|
133
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
97
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
134
98
|
none: false
|
135
|
-
requirements:
|
136
|
-
- -
|
137
|
-
- !ruby/object:Gem::Version
|
138
|
-
|
139
|
-
|
140
|
-
- 0
|
141
|
-
version: "0"
|
142
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ! '>='
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
143
104
|
none: false
|
144
|
-
requirements:
|
145
|
-
- -
|
146
|
-
- !ruby/object:Gem::Version
|
147
|
-
|
148
|
-
segments:
|
149
|
-
- 0
|
150
|
-
version: "0"
|
105
|
+
requirements:
|
106
|
+
- - ! '>='
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
151
109
|
requirements: []
|
152
|
-
|
153
110
|
rubyforge_project: errata
|
154
|
-
rubygems_version: 1.
|
111
|
+
rubygems_version: 1.8.21
|
155
112
|
signing_key:
|
156
113
|
specification_version: 3
|
157
114
|
summary: Correct strings based on remote errata files
|
158
|
-
test_files:
|
115
|
+
test_files:
|
159
116
|
- test/helper.rb
|
160
|
-
- test/
|
117
|
+
- test/models.rb
|
118
|
+
- test/test_errata.rb
|
119
|
+
- test/test_old_style.rb
|
120
|
+
has_rdoc:
|
data/README.rdoc
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
=errata
|
2
|
-
|
3
|
-
Correct strings based on remote errata files.
|
4
|
-
|
5
|
-
==UTF-8
|
6
|
-
|
7
|
-
Assumes all input strings are UTF-8. Otherwise there can be problems with Ruby 1.9 and Regexp::FIXEDENCODING. Specifically, ASCII-8BIT regexps might be applied to UTF-8 strings (or vice-versa), resulting in Encoding::CompatibilityError.
|
8
|
-
|
9
|
-
==Real-life usage
|
10
|
-
|
11
|
-
Used by data_miner (http://github.com/seamusabshere/data_miner)
|
12
|
-
|
13
|
-
==Example
|
14
|
-
|
15
|
-
Taken from <tt>#{GEMDIR}/test/test_old_syntax.rb</tt>:
|
16
|
-
|
17
|
-
errata = Errata.new(:url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv')
|
18
|
-
rover = { 'manufacturer_name' => 'foobar Austin Rover foobar' }
|
19
|
-
mercedes = { 'manufacturer_name' => 'MERCEDES' }
|
20
|
-
errata.correct! mercedes
|
21
|
-
errata.correct! rover
|
22
|
-
|
23
|
-
Now you will have
|
24
|
-
|
25
|
-
rover['manufacturer_name'] #=> 'Rover' (used to be 'foobar Austin Rover foobar')
|
26
|
-
mercedes['manufacturer_name'] #=> 'Mercedes-Benz' (used to be 'MERCEDES')
|
27
|
-
|
28
|
-
==Authors
|
29
|
-
|
30
|
-
* Seamus Abshere <seamus@abshere.net>
|
31
|
-
* Andy Rossmeissl <andy@rossmeissl.net>
|
32
|
-
|
33
|
-
==Copyright
|
34
|
-
|
35
|
-
Copyright (c) 2011 Brighter Planet. See LICENSE for details.
|