errata 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Brighter Planet
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,31 @@
1
+ =errata
2
+
3
+ Correct strings based on remote errata files.
4
+
5
+ ==Real-life usage
6
+
7
+ Used by data_miner (http://github.com/seamusabshere/data_miner)
8
+
9
+ ==Example
10
+
11
+ Taken from <tt>#{GEMDIR}/test/errata_test.rb</tt>:
12
+
13
+ errata = Errata.new(:url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv')
14
+ rover = { 'manufacturer_name' => 'foobar Austin Rover foobar' }
15
+ mercedes = { 'manufacturer_name' => 'MERCEDES' }
16
+ errata.correct!(mercedes)
17
+ errata.correct!(rover)
18
+
19
+ Now you will have
20
+
21
+ rover['manufacturer_name'] #=> 'Rover' (used to be 'foobar Austin Rover foobar')
22
+ mercedes['manufacturer_name'] #=> 'Mercedes-Benz' (used to be 'MERCEDES')
23
+
24
+ ==Authors
25
+
26
+ * Seamus Abshere <seamus@abshere.net>
27
+ * Andy Rossmeissl <andy@rossmeissl.net>
28
+
29
+ ==Copyright
30
+
31
+ Copyright (c) 2009 Brighter Planet. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,65 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "errata"
8
+ gem.summary = %Q{Correct strings based on remote errata files}
9
+ gem.description = %Q{Correct strings based on remote errata files}
10
+ gem.email = "seamus@abshere.net"
11
+ gem.homepage = "http://github.com/seamusabshere/errata"
12
+ gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
+ %w{ activesupport remote_table }.each { |name| gem.add_dependency name }
14
+ gem.require_path = "lib"
15
+ gem.files.include %w(lib/erratum) unless gem.files.empty? # seems to fail once it's in the wild
16
+ gem.rdoc_options << '--line-numbers' << '--inline-source'
17
+ gem.rubyforge_project = "errata"
18
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
19
+ end
20
+ Jeweler::GemcutterTasks.new
21
+ Jeweler::RubyforgeTasks.new do |rubyforge|
22
+ rubyforge.doc_task = "rdoc"
23
+ end
24
+ rescue LoadError
25
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
26
+ end
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/*_test.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ begin
36
+ require 'rcov/rcovtask'
37
+ Rcov::RcovTask.new do |test|
38
+ test.libs << 'test'
39
+ test.pattern = 'test/**/*_test.rb'
40
+ test.verbose = true
41
+ end
42
+ rescue LoadError
43
+ task :rcov do
44
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
45
+ end
46
+ end
47
+
48
+
49
+
50
+
51
+ task :default => :test
52
+
53
+ require 'rake/rdoctask'
54
+ Rake::RDocTask.new do |rdoc|
55
+ if File.exist?('VERSION')
56
+ version = File.read('VERSION')
57
+ else
58
+ version = ""
59
+ end
60
+
61
+ rdoc.rdoc_dir = 'rdoc'
62
+ rdoc.title = "errata #{version}"
63
+ rdoc.rdoc_files.include('README*')
64
+ rdoc.rdoc_files.include('lib/**/*.rb')
65
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.4
data/errata.gemspec ADDED
@@ -0,0 +1,65 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{errata}
8
+ s.version = "0.1.4"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
+ s.date = %q{2009-11-02}
13
+ s.description = %q{Correct strings based on remote errata files}
14
+ s.email = %q{seamus@abshere.net}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "errata.gemspec",
27
+ "lib/errata.rb",
28
+ "lib/erratum.rb",
29
+ "lib/erratum/delete.rb",
30
+ "lib/erratum/reject.rb",
31
+ "lib/erratum/replace.rb",
32
+ "lib/erratum/simplify.rb",
33
+ "lib/erratum/transform.rb",
34
+ "lib/erratum/truncate.rb",
35
+ "test/errata_test.rb",
36
+ "test/test_helper.rb"
37
+ ]
38
+ s.homepage = %q{http://github.com/seamusabshere/errata}
39
+ s.rdoc_options = ["--charset=UTF-8", "--line-numbers", "--inline-source"]
40
+ s.require_paths = ["lib"]
41
+ s.rubyforge_project = %q{errata}
42
+ s.rubygems_version = %q{1.3.5}
43
+ s.summary = %q{Correct strings based on remote errata files}
44
+ s.test_files = [
45
+ "test/errata_test.rb",
46
+ "test/test_helper.rb"
47
+ ]
48
+
49
+ if s.respond_to? :specification_version then
50
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
51
+ s.specification_version = 3
52
+
53
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
54
+ s.add_runtime_dependency(%q<activesupport>, [">= 0"])
55
+ s.add_runtime_dependency(%q<remote_table>, [">= 0"])
56
+ else
57
+ s.add_dependency(%q<activesupport>, [">= 0"])
58
+ s.add_dependency(%q<remote_table>, [">= 0"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<activesupport>, [">= 0"])
62
+ s.add_dependency(%q<remote_table>, [">= 0"])
63
+ end
64
+ end
65
+
data/lib/errata.rb ADDED
@@ -0,0 +1,42 @@
1
+ require 'rubygems'
2
+ require 'activesupport'
3
+ require 'remote_table'
4
+ require 'erratum'
5
+ require 'erratum/delete'
6
+ require 'erratum/reject'
7
+ require 'erratum/replace'
8
+ require 'erratum/simplify'
9
+ require 'erratum/transform'
10
+ require 'erratum/truncate'
11
+
12
+ class Errata
13
+ attr_reader :klass
14
+
15
+ def initialize(options = {})
16
+ @klass = options[:klass]
17
+ @_table = RemoteTable.new(:url => options[:url])
18
+ end
19
+
20
+ def rejects?(row)
21
+ rejections.any? { |erratum| erratum.targets?(row) }
22
+ end
23
+
24
+ def correct!(row)
25
+ corrections.each { |erratum| erratum.correct!(row) }
26
+ nil
27
+ end
28
+
29
+ def implied_matching_methods
30
+ (rejections + corrections).map { |erratum| erratum.matching_method }.compact.uniq
31
+ end
32
+
33
+ private
34
+
35
+ def rejections
36
+ @_rejections ||= @_table.rows.map { |erratum| ::Errata::Erratum::Reject.new(self, erratum) if erratum[:action] == 'reject' }.compact
37
+ end
38
+
39
+ def corrections
40
+ @_corrections ||= @_table.rows.map { |erratum| "::Errata::Erratum::#{erratum[:action].camelcase}".constantize.new(self, erratum) if %w{delete replace simplify transform truncate}.include?(erratum[:action]) }.compact
41
+ end
42
+ end
data/lib/erratum.rb ADDED
@@ -0,0 +1,75 @@
1
+ class Errata
2
+ class Erratum
3
+ attr_accessor :errata, :column, :matching_method
4
+ delegate :klass, :to => :errata
5
+
6
+ def initialize(errata, options = {})
7
+ raise "you can't set this from outside" if options.has_key?(:prefix)
8
+ @errata = errata
9
+ @column = options[:section]
10
+ @matching_method = "#{options[:condition].gsub(/[^a-z0-9]/i, '_').downcase}?".to_sym if options[:condition]
11
+ end
12
+
13
+ def inspect
14
+ "<#{self.class.name}:#{object_id} klass=#{klass.name} column=#{column} matching_method=#{matching_method}"
15
+ end
16
+
17
+ def targets?(row)
18
+ !!(method_matches?(row) and expression_matches?(row))
19
+ end
20
+
21
+ def correct!(row, &block)
22
+ return :skipped unless targets?(row)
23
+ # old_value = row[column].to_s.dup
24
+ yield if block_given?
25
+ # unless name.demodulize.underscore == 'truncate' or name.demodulize.underscore == 'simplify'
26
+ # puts "-" * 64
27
+ # puts inspect
28
+ # puts row.inspect
29
+ # if row[column] != old_value
30
+ # puts "#{old_value} -> #{row[column]}"
31
+ # else
32
+ # puts "no change"
33
+ # end
34
+ # puts
35
+ # end
36
+ :corrected
37
+ end
38
+
39
+ private
40
+
41
+ def expression_matches?(row)
42
+ return true if matching_expression.blank? or column.blank?
43
+ if matching_expression.is_a?(Regexp)
44
+ matching_expression.match(row[column].to_s)
45
+ else
46
+ row[column].to_s.include?(matching_expression)
47
+ end
48
+ end
49
+
50
+ def method_matches?(row)
51
+ return true if matching_method.nil?
52
+ klass.send(matching_method, row)
53
+ end
54
+
55
+ def set_matching_expression(options = {})
56
+ if options[:x].blank?
57
+ @matching_expression = nil
58
+ elsif options[:x].starts_with?('/')
59
+ if options[:x].ends_with?('i')
60
+ ci = true
61
+ options[:x] = options[:x].chop
62
+ else
63
+ ci = false
64
+ end
65
+ @matching_expression = Regexp.new(options[:x].gsub(/\A\/|\/\z/, ''), ci)
66
+ elsif /\Aabbr\((.*)\)\z/.match(options[:x])
67
+ @matching_expression = Regexp.new('(\A|\s)' + $1.split(/(\w\??)/).reject { |a| a == '' }.join('\.?\s?') + '\.?([^\w\.]|\z)', true)
68
+ elsif options[:prefix] == true
69
+ @matching_expression = Regexp.new('\A\s*' + Regexp.escape(options[:x]), true)
70
+ else
71
+ @matching_expression = options[:x]
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,24 @@
1
+ class Errata
2
+ class Erratum
3
+ class Delete < Erratum
4
+ attr_accessor :matching_expression, :backfill
5
+
6
+ def initialize(errata, options = {})
7
+ super
8
+ set_matching_expression(options)
9
+ # otherwise abbr(X) will kill the characters before and after the match
10
+ @backfill = /\Aabbr\((.*)\)\z/.match(options[:x]) ? '\1\2' : ''
11
+ end
12
+
13
+ def inspect
14
+ super + " matching_expression=#{matching_expression}>"
15
+ end
16
+
17
+ def correct!(row)
18
+ super(row) do
19
+ row[column].gsub!(matching_expression, backfill)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,20 @@
1
+ class Errata
2
+ class Erratum
3
+ class Reject < Erratum
4
+ attr_accessor :matching_expression
5
+
6
+ def initialize(errata, options = {})
7
+ super
8
+ set_matching_expression(options.merge(:prefix => true))
9
+ end
10
+
11
+ def inspect
12
+ super + " matching_expression=#{matching_expression}"
13
+ end
14
+
15
+ def correct!
16
+ raise "rejections don't correct"
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,27 @@
1
+ class Errata
2
+ class Erratum
3
+ class Replace < Erratum
4
+ attr_accessor :matching_expression, :correction
5
+
6
+ def initialize(errata, options = {})
7
+ super
8
+ set_matching_expression(options)
9
+ @correction = /\Aabbr\((.*)\)\z/.match(options[:x]) ? '\1' + options[:y].to_s + '\2' : options[:y].to_s
10
+ end
11
+
12
+ def inspect
13
+ super + " matching_expression=#{matching_expression} correction=#{correction}>"
14
+ end
15
+
16
+ def correct!(row)
17
+ super(row) do
18
+ if matching_expression.blank?
19
+ row[column] = correction
20
+ else
21
+ row[column].gsub!(matching_expression, correction)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,33 @@
1
+ class Errata
2
+ class Erratum
3
+ class Simplify < Erratum
4
+ attr_accessor :second_column
5
+
6
+ def initialize(errata, options = {})
7
+ super
8
+ @second_column = options[:x]
9
+ end
10
+
11
+ def inspect
12
+ super + " second_column=#{second_column}>"
13
+ end
14
+
15
+ def targets?(row)
16
+ !row[column].blank? and !row[second_column].blank? and method_matches?(row) and matching_expression(row).match(row[column])
17
+ end
18
+
19
+ def correct!(row)
20
+ super(row) do
21
+ row[column].gsub!(matching_expression(row), '')
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def matching_expression(row)
28
+ @_matching_expressions ||= {}
29
+ @_matching_expressions[row[second_column]] ||= /[\s\(\[\'\"]*#{Regexp.escape(row[second_column])}[\s\)\]\'\"]*/
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,25 @@
1
+ class Errata
2
+ class Erratum
3
+ class Transform < Erratum
4
+ ALLOWED_METHODS = %w{upcase downcase}
5
+ attr_accessor :matching_expression, :string_method
6
+
7
+ def initialize(errata, options = {})
8
+ super
9
+ set_matching_expression(options)
10
+ @string_method = options[:y]
11
+ raise "string method (#{@string_method}) needs to be in (#{ALLOWED_METHODS.join(', ')})" unless ALLOWED_METHODS.include?(@string_method)
12
+ end
13
+
14
+ def inspect
15
+ super + " matching_expression=#{matching_expression} string_method=#{string_method}>"
16
+ end
17
+
18
+ def correct!(row)
19
+ super(row) do
20
+ row[column].gsub!(matching_expression) { |match| match.send(string_method) }
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,24 @@
1
+ class Errata
2
+ class Erratum
3
+ class Truncate < Erratum
4
+ attr_accessor :matching_expression, :necessary_and_sufficient_prefix
5
+
6
+ def initialize(errata, options = {})
7
+ super
8
+ @necessary_and_sufficient_prefix = options[:x]
9
+ raise "necessary_and_sufficient_prefix cannot be blank" if @necessary_and_sufficient_prefix.blank?
10
+ set_matching_expression(options.merge(:prefix => true))
11
+ end
12
+
13
+ def inspect
14
+ super + " matching_expression=#{matching_expression} necessary_and_sufficient_prefix=#{necessary_and_sufficient_prefix}>"
15
+ end
16
+
17
+ def correct!(row)
18
+ super(row) do
19
+ row[column] = necessary_and_sufficient_prefix
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,34 @@
1
+ require 'test_helper'
2
+
3
+ class Brewery
4
+ def self.is_microbrew?(row)
5
+ row[:volume] < 500
6
+ end
7
+ end
8
+
9
+ class ErrataTest < Test::Unit::TestCase
10
+ def setup
11
+ @e = Errata.new(:url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv')
12
+ end
13
+
14
+ should "return implied matching methods" do
15
+ flunk "create a fake errata for Brewery"
16
+ end
17
+
18
+ should "use matching methods" do
19
+ flunk "create a fake errata for Brewery"
20
+ end
21
+
22
+ should "correct rows" do
23
+ rover = { 'manufacturer_name' => 'foobar Austin Rover foobar' }
24
+ mercedes = { 'manufacturer_name' => 'MERCEDES' }
25
+ @e.correct!(mercedes)
26
+ @e.correct!(rover)
27
+ assert_equal 'Mercedes-Benz', mercedes['manufacturer_name']
28
+ assert_equal 'Rover', rover['manufacturer_name']
29
+ end
30
+
31
+ should "reject rows" do
32
+ assert @e.rejects?('manufacturer_name' => 'AURORA CARS')
33
+ end
34
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'errata'
8
+
9
+ class Test::Unit::TestCase
10
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: errata
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ platform: ruby
6
+ authors:
7
+ - Seamus Abshere
8
+ - Andy Rossmeissl
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-11-02 00:00:00 -05:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: activesupport
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ version:
26
+ - !ruby/object:Gem::Dependency
27
+ name: remote_table
28
+ type: :runtime
29
+ version_requirement:
30
+ version_requirements: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ version:
36
+ description: Correct strings based on remote errata files
37
+ email: seamus@abshere.net
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files:
43
+ - LICENSE
44
+ - README.rdoc
45
+ files:
46
+ - .document
47
+ - .gitignore
48
+ - LICENSE
49
+ - README.rdoc
50
+ - Rakefile
51
+ - VERSION
52
+ - errata.gemspec
53
+ - lib/errata.rb
54
+ - lib/erratum.rb
55
+ - lib/erratum/delete.rb
56
+ - lib/erratum/reject.rb
57
+ - lib/erratum/replace.rb
58
+ - lib/erratum/simplify.rb
59
+ - lib/erratum/transform.rb
60
+ - lib/erratum/truncate.rb
61
+ - test/errata_test.rb
62
+ - test/test_helper.rb
63
+ has_rdoc: true
64
+ homepage: http://github.com/seamusabshere/errata
65
+ licenses: []
66
+
67
+ post_install_message:
68
+ rdoc_options:
69
+ - --charset=UTF-8
70
+ - --line-numbers
71
+ - --inline-source
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: "0"
79
+ version:
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: "0"
85
+ version:
86
+ requirements: []
87
+
88
+ rubyforge_project: errata
89
+ rubygems_version: 1.3.5
90
+ signing_key:
91
+ specification_version: 3
92
+ summary: Correct strings based on remote errata files
93
+ test_files:
94
+ - test/errata_test.rb
95
+ - test/test_helper.rb