errata 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Brighter Planet
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,31 @@
1
+ =errata
2
+
3
+ Correct strings based on remote errata files.
4
+
5
+ ==Real-life usage
6
+
7
+ Used by data_miner (http://github.com/seamusabshere/data_miner)
8
+
9
+ ==Example
10
+
11
+ Taken from <tt>#{GEMDIR}/test/errata_test.rb</tt>:
12
+
13
+ errata = Errata.new(:url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv')
14
+ rover = { 'manufacturer_name' => 'foobar Austin Rover foobar' }
15
+ mercedes = { 'manufacturer_name' => 'MERCEDES' }
16
+ errata.correct!(mercedes)
17
+ errata.correct!(rover)
18
+
19
+ Now you will have
20
+
21
+ rover['manufacturer_name'] #=> 'Rover' (used to be 'foobar Austin Rover foobar')
22
+ mercedes['manufacturer_name'] #=> 'Mercedes-Benz' (used to be 'MERCEDES')
23
+
24
+ ==Authors
25
+
26
+ * Seamus Abshere <seamus@abshere.net>
27
+ * Andy Rossmeissl <andy@rossmeissl.net>
28
+
29
+ ==Copyright
30
+
31
+ Copyright (c) 2009 Brighter Planet. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,65 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "errata"
8
+ gem.summary = %Q{Correct strings based on remote errata files}
9
+ gem.description = %Q{Correct strings based on remote errata files}
10
+ gem.email = "seamus@abshere.net"
11
+ gem.homepage = "http://github.com/seamusabshere/errata"
12
+ gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
+ %w{ activesupport remote_table }.each { |name| gem.add_dependency name }
14
+ gem.require_path = "lib"
15
+ gem.files.include %w(lib/erratum) unless gem.files.empty? # seems to fail once it's in the wild
16
+ gem.rdoc_options << '--line-numbers' << '--inline-source'
17
+ gem.rubyforge_project = "errata"
18
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
19
+ end
20
+ Jeweler::GemcutterTasks.new
21
+ Jeweler::RubyforgeTasks.new do |rubyforge|
22
+ rubyforge.doc_task = "rdoc"
23
+ end
24
+ rescue LoadError
25
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
26
+ end
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/*_test.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ begin
36
+ require 'rcov/rcovtask'
37
+ Rcov::RcovTask.new do |test|
38
+ test.libs << 'test'
39
+ test.pattern = 'test/**/*_test.rb'
40
+ test.verbose = true
41
+ end
42
+ rescue LoadError
43
+ task :rcov do
44
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
45
+ end
46
+ end
47
+
48
+
49
+
50
+
51
+ task :default => :test
52
+
53
+ require 'rake/rdoctask'
54
+ Rake::RDocTask.new do |rdoc|
55
+ if File.exist?('VERSION')
56
+ version = File.read('VERSION')
57
+ else
58
+ version = ""
59
+ end
60
+
61
+ rdoc.rdoc_dir = 'rdoc'
62
+ rdoc.title = "errata #{version}"
63
+ rdoc.rdoc_files.include('README*')
64
+ rdoc.rdoc_files.include('lib/**/*.rb')
65
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.4
data/errata.gemspec ADDED
@@ -0,0 +1,65 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{errata}
8
+ s.version = "0.1.4"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
+ s.date = %q{2009-11-02}
13
+ s.description = %q{Correct strings based on remote errata files}
14
+ s.email = %q{seamus@abshere.net}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "errata.gemspec",
27
+ "lib/errata.rb",
28
+ "lib/erratum.rb",
29
+ "lib/erratum/delete.rb",
30
+ "lib/erratum/reject.rb",
31
+ "lib/erratum/replace.rb",
32
+ "lib/erratum/simplify.rb",
33
+ "lib/erratum/transform.rb",
34
+ "lib/erratum/truncate.rb",
35
+ "test/errata_test.rb",
36
+ "test/test_helper.rb"
37
+ ]
38
+ s.homepage = %q{http://github.com/seamusabshere/errata}
39
+ s.rdoc_options = ["--charset=UTF-8", "--line-numbers", "--inline-source"]
40
+ s.require_paths = ["lib"]
41
+ s.rubyforge_project = %q{errata}
42
+ s.rubygems_version = %q{1.3.5}
43
+ s.summary = %q{Correct strings based on remote errata files}
44
+ s.test_files = [
45
+ "test/errata_test.rb",
46
+ "test/test_helper.rb"
47
+ ]
48
+
49
+ if s.respond_to? :specification_version then
50
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
51
+ s.specification_version = 3
52
+
53
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
54
+ s.add_runtime_dependency(%q<activesupport>, [">= 0"])
55
+ s.add_runtime_dependency(%q<remote_table>, [">= 0"])
56
+ else
57
+ s.add_dependency(%q<activesupport>, [">= 0"])
58
+ s.add_dependency(%q<remote_table>, [">= 0"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<activesupport>, [">= 0"])
62
+ s.add_dependency(%q<remote_table>, [">= 0"])
63
+ end
64
+ end
65
+
data/lib/errata.rb ADDED
@@ -0,0 +1,42 @@
1
+ require 'rubygems'
2
+ require 'activesupport'
3
+ require 'remote_table'
4
+ require 'erratum'
5
+ require 'erratum/delete'
6
+ require 'erratum/reject'
7
+ require 'erratum/replace'
8
+ require 'erratum/simplify'
9
+ require 'erratum/transform'
10
+ require 'erratum/truncate'
11
+
12
+ class Errata
13
+ attr_reader :klass
14
+
15
+ def initialize(options = {})
16
+ @klass = options[:klass]
17
+ @_table = RemoteTable.new(:url => options[:url])
18
+ end
19
+
20
+ def rejects?(row)
21
+ rejections.any? { |erratum| erratum.targets?(row) }
22
+ end
23
+
24
+ def correct!(row)
25
+ corrections.each { |erratum| erratum.correct!(row) }
26
+ nil
27
+ end
28
+
29
+ def implied_matching_methods
30
+ (rejections + corrections).map { |erratum| erratum.matching_method }.compact.uniq
31
+ end
32
+
33
+ private
34
+
35
+ def rejections
36
+ @_rejections ||= @_table.rows.map { |erratum| ::Errata::Erratum::Reject.new(self, erratum) if erratum[:action] == 'reject' }.compact
37
+ end
38
+
39
+ def corrections
40
+ @_corrections ||= @_table.rows.map { |erratum| "::Errata::Erratum::#{erratum[:action].camelcase}".constantize.new(self, erratum) if %w{delete replace simplify transform truncate}.include?(erratum[:action]) }.compact
41
+ end
42
+ end
data/lib/erratum.rb ADDED
@@ -0,0 +1,75 @@
1
+ class Errata
2
+ class Erratum
3
+ attr_accessor :errata, :column, :matching_method
4
+ delegate :klass, :to => :errata
5
+
6
+ def initialize(errata, options = {})
7
+ raise "you can't set this from outside" if options.has_key?(:prefix)
8
+ @errata = errata
9
+ @column = options[:section]
10
+ @matching_method = "#{options[:condition].gsub(/[^a-z0-9]/i, '_').downcase}?".to_sym if options[:condition]
11
+ end
12
+
13
+ def inspect
14
+ "<#{self.class.name}:#{object_id} klass=#{klass.name} column=#{column} matching_method=#{matching_method}"
15
+ end
16
+
17
+ def targets?(row)
18
+ !!(method_matches?(row) and expression_matches?(row))
19
+ end
20
+
21
+ def correct!(row, &block)
22
+ return :skipped unless targets?(row)
23
+ # old_value = row[column].to_s.dup
24
+ yield if block_given?
25
+ # unless name.demodulize.underscore == 'truncate' or name.demodulize.underscore == 'simplify'
26
+ # puts "-" * 64
27
+ # puts inspect
28
+ # puts row.inspect
29
+ # if row[column] != old_value
30
+ # puts "#{old_value} -> #{row[column]}"
31
+ # else
32
+ # puts "no change"
33
+ # end
34
+ # puts
35
+ # end
36
+ :corrected
37
+ end
38
+
39
+ private
40
+
41
+ def expression_matches?(row)
42
+ return true if matching_expression.blank? or column.blank?
43
+ if matching_expression.is_a?(Regexp)
44
+ matching_expression.match(row[column].to_s)
45
+ else
46
+ row[column].to_s.include?(matching_expression)
47
+ end
48
+ end
49
+
50
+ def method_matches?(row)
51
+ return true if matching_method.nil?
52
+ klass.send(matching_method, row)
53
+ end
54
+
55
+ def set_matching_expression(options = {})
56
+ if options[:x].blank?
57
+ @matching_expression = nil
58
+ elsif options[:x].starts_with?('/')
59
+ if options[:x].ends_with?('i')
60
+ ci = true
61
+ options[:x] = options[:x].chop
62
+ else
63
+ ci = false
64
+ end
65
+ @matching_expression = Regexp.new(options[:x].gsub(/\A\/|\/\z/, ''), ci)
66
+ elsif /\Aabbr\((.*)\)\z/.match(options[:x])
67
+ @matching_expression = Regexp.new('(\A|\s)' + $1.split(/(\w\??)/).reject { |a| a == '' }.join('\.?\s?') + '\.?([^\w\.]|\z)', true)
68
+ elsif options[:prefix] == true
69
+ @matching_expression = Regexp.new('\A\s*' + Regexp.escape(options[:x]), true)
70
+ else
71
+ @matching_expression = options[:x]
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,24 @@
1
+ class Errata
2
+ class Erratum
3
+ class Delete < Erratum
4
+ attr_accessor :matching_expression, :backfill
5
+
6
+ def initialize(errata, options = {})
7
+ super
8
+ set_matching_expression(options)
9
+ # otherwise abbr(X) will kill the characters before and after the match
10
+ @backfill = /\Aabbr\((.*)\)\z/.match(options[:x]) ? '\1\2' : ''
11
+ end
12
+
13
+ def inspect
14
+ super + " matching_expression=#{matching_expression}>"
15
+ end
16
+
17
+ def correct!(row)
18
+ super(row) do
19
+ row[column].gsub!(matching_expression, backfill)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,20 @@
1
+ class Errata
2
+ class Erratum
3
+ class Reject < Erratum
4
+ attr_accessor :matching_expression
5
+
6
+ def initialize(errata, options = {})
7
+ super
8
+ set_matching_expression(options.merge(:prefix => true))
9
+ end
10
+
11
+ def inspect
12
+ super + " matching_expression=#{matching_expression}"
13
+ end
14
+
15
+ def correct!
16
+ raise "rejections don't correct"
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,27 @@
1
+ class Errata
2
+ class Erratum
3
+ class Replace < Erratum
4
+ attr_accessor :matching_expression, :correction
5
+
6
+ def initialize(errata, options = {})
7
+ super
8
+ set_matching_expression(options)
9
+ @correction = /\Aabbr\((.*)\)\z/.match(options[:x]) ? '\1' + options[:y].to_s + '\2' : options[:y].to_s
10
+ end
11
+
12
+ def inspect
13
+ super + " matching_expression=#{matching_expression} correction=#{correction}>"
14
+ end
15
+
16
+ def correct!(row)
17
+ super(row) do
18
+ if matching_expression.blank?
19
+ row[column] = correction
20
+ else
21
+ row[column].gsub!(matching_expression, correction)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,33 @@
1
+ class Errata
2
+ class Erratum
3
+ class Simplify < Erratum
4
+ attr_accessor :second_column
5
+
6
+ def initialize(errata, options = {})
7
+ super
8
+ @second_column = options[:x]
9
+ end
10
+
11
+ def inspect
12
+ super + " second_column=#{second_column}>"
13
+ end
14
+
15
+ def targets?(row)
16
+ !row[column].blank? and !row[second_column].blank? and method_matches?(row) and matching_expression(row).match(row[column])
17
+ end
18
+
19
+ def correct!(row)
20
+ super(row) do
21
+ row[column].gsub!(matching_expression(row), '')
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def matching_expression(row)
28
+ @_matching_expressions ||= {}
29
+ @_matching_expressions[row[second_column]] ||= /[\s\(\[\'\"]*#{Regexp.escape(row[second_column])}[\s\)\]\'\"]*/
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,25 @@
1
+ class Errata
2
+ class Erratum
3
+ class Transform < Erratum
4
+ ALLOWED_METHODS = %w{upcase downcase}
5
+ attr_accessor :matching_expression, :string_method
6
+
7
+ def initialize(errata, options = {})
8
+ super
9
+ set_matching_expression(options)
10
+ @string_method = options[:y]
11
+ raise "string method (#{@string_method}) needs to be in (#{ALLOWED_METHODS.join(', ')})" unless ALLOWED_METHODS.include?(@string_method)
12
+ end
13
+
14
+ def inspect
15
+ super + " matching_expression=#{matching_expression} string_method=#{string_method}>"
16
+ end
17
+
18
+ def correct!(row)
19
+ super(row) do
20
+ row[column].gsub!(matching_expression) { |match| match.send(string_method) }
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,24 @@
1
+ class Errata
2
+ class Erratum
3
+ class Truncate < Erratum
4
+ attr_accessor :matching_expression, :necessary_and_sufficient_prefix
5
+
6
+ def initialize(errata, options = {})
7
+ super
8
+ @necessary_and_sufficient_prefix = options[:x]
9
+ raise "necessary_and_sufficient_prefix cannot be blank" if @necessary_and_sufficient_prefix.blank?
10
+ set_matching_expression(options.merge(:prefix => true))
11
+ end
12
+
13
+ def inspect
14
+ super + " matching_expression=#{matching_expression} necessary_and_sufficient_prefix=#{necessary_and_sufficient_prefix}>"
15
+ end
16
+
17
+ def correct!(row)
18
+ super(row) do
19
+ row[column] = necessary_and_sufficient_prefix
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,34 @@
1
+ require 'test_helper'
2
+
3
+ class Brewery
4
+ def self.is_microbrew?(row)
5
+ row[:volume] < 500
6
+ end
7
+ end
8
+
9
+ class ErrataTest < Test::Unit::TestCase
10
+ def setup
11
+ @e = Errata.new(:url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv')
12
+ end
13
+
14
+ should "return implied matching methods" do
15
+ flunk "create a fake errata for Brewery"
16
+ end
17
+
18
+ should "use matching methods" do
19
+ flunk "create a fake errata for Brewery"
20
+ end
21
+
22
+ should "correct rows" do
23
+ rover = { 'manufacturer_name' => 'foobar Austin Rover foobar' }
24
+ mercedes = { 'manufacturer_name' => 'MERCEDES' }
25
+ @e.correct!(mercedes)
26
+ @e.correct!(rover)
27
+ assert_equal 'Mercedes-Benz', mercedes['manufacturer_name']
28
+ assert_equal 'Rover', rover['manufacturer_name']
29
+ end
30
+
31
+ should "reject rows" do
32
+ assert @e.rejects?('manufacturer_name' => 'AURORA CARS')
33
+ end
34
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'errata'
8
+
9
+ class Test::Unit::TestCase
10
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: errata
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ platform: ruby
6
+ authors:
7
+ - Seamus Abshere
8
+ - Andy Rossmeissl
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-11-02 00:00:00 -05:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: activesupport
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ version:
26
+ - !ruby/object:Gem::Dependency
27
+ name: remote_table
28
+ type: :runtime
29
+ version_requirement:
30
+ version_requirements: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ version:
36
+ description: Correct strings based on remote errata files
37
+ email: seamus@abshere.net
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files:
43
+ - LICENSE
44
+ - README.rdoc
45
+ files:
46
+ - .document
47
+ - .gitignore
48
+ - LICENSE
49
+ - README.rdoc
50
+ - Rakefile
51
+ - VERSION
52
+ - errata.gemspec
53
+ - lib/errata.rb
54
+ - lib/erratum.rb
55
+ - lib/erratum/delete.rb
56
+ - lib/erratum/reject.rb
57
+ - lib/erratum/replace.rb
58
+ - lib/erratum/simplify.rb
59
+ - lib/erratum/transform.rb
60
+ - lib/erratum/truncate.rb
61
+ - test/errata_test.rb
62
+ - test/test_helper.rb
63
+ has_rdoc: true
64
+ homepage: http://github.com/seamusabshere/errata
65
+ licenses: []
66
+
67
+ post_install_message:
68
+ rdoc_options:
69
+ - --charset=UTF-8
70
+ - --line-numbers
71
+ - --inline-source
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: "0"
79
+ version:
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: "0"
85
+ version:
86
+ requirements: []
87
+
88
+ rubyforge_project: errata
89
+ rubygems_version: 1.3.5
90
+ signing_key:
91
+ specification_version: 3
92
+ summary: Correct strings based on remote errata files
93
+ test_files:
94
+ - test/errata_test.rb
95
+ - test/test_helper.rb