RubyGems - jinx-migrate - Versions diffs - 2.1.1 - Mend

jinx-migrate 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

data/.gitignore +14 -0
data/.rspec +3 -0
data/.yardopts +1 -0
data/Gemfile +8 -0
data/Gemfile.lock +38 -0
data/History.md +6 -0
data/LEGAL +5 -0
data/LICENSE +22 -0
data/README.md +33 -0
data/Rakefile +40 -0
data/bin/csvjoin +24 -0
data/examples/family/README.md +24 -0
data/examples/family/conf/children/fields.yaml +2 -0
data/examples/family/conf/parents/defaults.yaml +3 -0
data/examples/family/conf/parents/fields.yaml +6 -0
data/examples/family/conf/parents/values.yaml +4 -0
data/examples/family/data/children.csv +1 -0
data/examples/family/data/parents.csv +1 -0
data/examples/family/lib/shims.rb +17 -0
data/jinx-migrate.gemspec +26 -0
data/lib/jinx/csv/csvio.rb +214 -0
data/lib/jinx/csv/joiner.rb +196 -0
data/lib/jinx/migration/filter.rb +167 -0
data/lib/jinx/migration/migratable.rb +244 -0
data/lib/jinx/migration/migrator.rb +1029 -0
data/lib/jinx/migration/reader.rb +16 -0
data/lib/jinx/migration/version.rb +5 -0
data/spec/bad/bad_spec.rb +25 -0
data/spec/bad/fields.yaml +1 -0
data/spec/bad/parents.csv +1 -0
data/spec/bad/shims.rb +16 -0
data/spec/csv/join/join_helper.rb +35 -0
data/spec/csv/join/join_spec.rb +100 -0
data/spec/csv/join/jumbled_src.csv +7 -0
data/spec/csv/join/jumbled_tgt.csv +7 -0
data/spec/csv/join/source.csv +7 -0
data/spec/csv/join/target.csv +7 -0
data/spec/extract/extract.rb +13 -0
data/spec/extract/extract_spec.rb +33 -0
data/spec/extract/fields.yaml +1 -0
data/spec/extract/parents.csv +1 -0
data/spec/family/child_spec.rb +27 -0
data/spec/family/family.rb +13 -0
data/spec/family/parent_spec.rb +57 -0
data/spec/filter/fields.yaml +1 -0
data/spec/filter/filter_spec.rb +20 -0
data/spec/filter/parents.csv +1 -0
data/spec/filter/values.yaml +4 -0
data/spec/primitive/children.csv +1 -0
data/spec/primitive/fields.yaml +4 -0
data/spec/primitive/primitive_spec.rb +24 -0
data/spec/skip/fields.yaml +1 -0
data/spec/skip/parents.csv +1 -0
data/spec/skip/skip_spec.rb +17 -0
data/spec/spec_helper.rb +17 -0
data/spec/support/model.rb +7 -0
data/spec/unique/fields.yaml +1 -0
data/spec/unique/parent.rb +6 -0
data/spec/unique/parents.csv +1 -0
data/spec/unique/shims.rb +10 -0
data/spec/unique/unique_spec.rb +20 -0
data/test/fixtures/csv/data/empty.csv +1 -0
data/test/fixtures/csv/data/variety.csv +1 -0
data/test/lib/csv/csvio_test.rb +74 -0
metadata +206 -0

data/.gitignore ADDED Viewed

@@ -0,0 +1,14 @@
+.DS_Store
+*~
+*.pdf
+.project
+.loadpath
+.yardoc
+*.gem
+*.tar*
+**/ext/bin
+**/classes
+/doc/api
+**log
+/test/results

data/.rspec ADDED Viewed

@@ -0,0 +1,3 @@
+--backtrace
+--format Fuubar
+--color

data/.yardopts ADDED Viewed

	@@ -0,0 +1 @@
1	+ -o doc/api --private --protected - History.md LEGAL LICENSE

data/Gemfile ADDED Viewed

@@ -0,0 +1,8 @@
+source :rubygems
+gemspec
+group :development do
+  # Uncomment to use the local development project.
+  gem 'jinx', :path => File.dirname(__FILE__) + '/../core'
+  gem 'jinx-migrate', :path => File.dirname(__FILE__)
+end

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,38 @@
+PATH
+  remote: .
+  specs:
+    jinx-migrate (2.1.1)
+      bundler
+      fastercsv
+      rack
+PATH
+  remote: /Users/loneyf/workspace/jinx/core
+  specs:
+    jinx (2.1.1)
+      bundler
+GEM
+  remote: http://rubygems.org/
+  specs:
+    diff-lcs (1.1.3)
+    fastercsv (1.5.4)
+    rack (1.4.1)
+    rake (0.9.2.2)
+    rspec (2.9.0)
+      rspec-core (~> 2.9.0)
+      rspec-expectations (~> 2.9.0)
+      rspec-mocks (~> 2.9.0)
+    rspec-core (2.9.0)
+    rspec-expectations (2.9.1)
+      diff-lcs (~> 1.1.3)
+    rspec-mocks (2.9.0)
+PLATFORMS
+  java
+DEPENDENCIES
+  jinx!
+  jinx-migrate!
+  rake
+  rspec (>= 2.6)

data/History.md ADDED Viewed

@@ -0,0 +1,6 @@
+This history lists major release themes. See the GitHub commits (https://github.com/jinx/migrate)
+for change details.
+2.1.1 / 2012-04-13
+------------------
+* Initial public release spun off from caruby/core.

data/LEGAL ADDED Viewed

@@ -0,0 +1,5 @@
+LEGAL NOTICE INFORMATION
+------------------------
+All the files in this distribution are covered under either the MIT
+license (see the file LICENSE).

data/LICENSE ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2012 Oregon Health & Science University
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,33 @@
+Jinx Migrator
+=============
+**Home**:         [http://github.com/jinx/migrate](http://github.com/jinx/migrate)
+**Git**:          [http://github.com/jinx/migrate](http://github.com/jinx/migrate)
+**Author**:       OHSU Knight Cancer Institute
+**Copyright**:    2012
+**License**:      MIT License
+Synopsis
+--------
+The Jinx Migrator migrates input data into a [Jinx](http://github.com/jinx/core) target.
+Installing
+----------
+The Jinx Migrator is installed as a JRuby gem:
+    [sudo] jgem install jinx-migrate
+Usage
+-----
+1. Enable Jinx for a Java package, as described in the [Jinx](http://github.com/jinx/core) Usage.
+2. Configure the input -> target mapping.
+3. Run the migrator.
+See the [Family](http://github.com/jinx/migrate/tree/master/examples/family) example for a sample migration.
+Copyright
+---------
+Jinx &copy; 2012 by [Oregon Health & Science University](http://www.ohsu.edu/xd/health/services/cancer/index.cfm).
+Jinx is licensed under the MIT license. Please see the LICENSE and LEGAL files for more information.

data/Rakefile ADDED Viewed

@@ -0,0 +1,40 @@
+require File.dirname(__FILE__) + '/lib/jinx/migration/version'
+# the gem name
+GEM = 'jinx-migrate'
+GEM_VERSION = Jinx::Migrate::VERSION
+WINDOWS = (Config::CONFIG['host_os'] =~ /mingw|win32|cygwin/ ? true : false) rescue false
+SUDO = WINDOWS ? '' : 'sudo'
+desc 'Default: run the specs'
+task :default => :spec
+desc "Builds the gem"
+task :gem do
+  sh "jgem build #{GEM}.gemspec"
+end
+desc "Installs the gem"
+task :install => :gem do
+  sh "#{SUDO} jgem install #{GEM}-#{GEM_VERSION}.gem"
+end
+desc 'Documents the API'
+task :doc do
+  FileUtils.rm_rf 'doc/api'
+  sh 'yardoc'
+end
+desc 'Runs the spec tests'
+task :spec do
+  Dir['spec/**/*_spec.rb'].each { |f| sh "rspec #{f}" rescue nil }
+end
+desc 'Runs the unit tests'
+task :unit do
+  Dir['test/**/*_test.rb'].each { |f| sh "jruby #{f}" rescue nil }
+end
+desc 'Runs all tests'
+task :test => [:spec, :unit]

data/bin/csvjoin ADDED Viewed

@@ -0,0 +1,24 @@
+#!/usr/bin/env ruby
+#
+# csvjoin: joins two CSV files on their common fields
+#
+# Add the migrate lib to the path.
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+require 'rubygems'
+require 'jinx'
+require 'jinx/csv/csvio'
+require 'jinx/cli/command'
+specs = [
+  [:to, '--to TARGET', 'The join target input file (default stdin)'],
+  [:as, '--as OUTPUT', 'The joined output file (default stdout)'],
+  [:source, 'SOURCE', 'The join source input file']
+]
+Jinx::CLI::Command.new(specs).start do |opts|
+  Jinx::CsvIO.join(opts.delete(:source), opts)
+end
+exit 0

data/examples/family/README.md ADDED Viewed

@@ -0,0 +1,24 @@
+Family migration example
+========================
+Synopsis
+--------
+This directory contains the Jinx migration Family example.
+The Family example demonstrates how to load the content of a source CSV file into
+a Family data store. The use cases illustrate several common migration impediments:
+* Different source-destination terminology
+* Different source-destination associations
+* Incomplete input
+* Denormalized input
+* Inconsistent input
+* Input data scrubbing
+Migration
+---------
+The example migration input data resides in the `data` directory.
+Each `parents` CSV input file holds one row for each parent.
+Each `childs` CSV input file holds one row for each parent.
+Each input file has a corresponding migration mapping configuration in the `conf` directory.

data/examples/family/conf/children/fields.yaml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ Parent: Parent.name
2	+ Child: Child.name

data/examples/family/conf/parents/defaults.yaml ADDED Viewed

@@ -0,0 +1,3 @@
+# This defaults configuration file demonstrates how to set a default property value in
+# the migrated record.
+Household.address.state: IL

data/examples/family/conf/parents/fields.yaml ADDED Viewed

@@ -0,0 +1,6 @@
+Name: Parent.name
+Street: Household.address.street1
+City: Household.address.city
+Zip: Household.address.postal_code
+Spouse: Parent.spouse.name

data/examples/family/conf/parents/values.yaml ADDED Viewed

@@ -0,0 +1,4 @@
+# This value filter configuration file demonstrates how to transform an input field value
+# to a migrated value. 'Street' is abbreviated to 'St'.
+Address.street1:
+  /^(.* St)reet(.*)$/ : "$1$2"

data/examples/family/data/children.csv ADDED Viewed

	@@ -0,0 +1 @@
1	+ Parent,Child

data/examples/family/data/parents.csv ADDED Viewed

	@@ -0,0 +1 @@
1	+ Name,Street,City,Zip,Spouse

data/examples/family/lib/shims.rb ADDED Viewed

@@ -0,0 +1,17 @@
+module Family
+  # Declares the classes modified for migration.
+  shims Parent
+  class Parent
+    # Augments the migration by setting the spouse household.
+    #
+    # @param [{Symbol => Object}] row the input row field => value hash
+    # @param [<Resource>] migrated the migrated instances
+    def migrate(row, migrated)
+      super
+      if spouse then
+        spouse.household = migrated.detect { |m| Household === m }
+      end
+    end
+  end
+end

data/jinx-migrate.gemspec ADDED Viewed

@@ -0,0 +1,26 @@
+require File.dirname(__FILE__) + '/lib/jinx/migration/version'
+require 'date'
+Gem::Specification.new do |s|
+  s.name          = 'jinx-migrate'
+  s.summary       = 'Jinx JSON plug-in.'
+  s.description   = s.summary + '. See github.com/jinx/migrate for more information.'
+  s.version       = Jinx::Migrate::VERSION
+  s.date          = Date.today
+  s.author        = 'OHSU'
+  s.email         = "jinx.ruby@gmail.com"
+  s.homepage      = 'http://github.com/jinx/migrate'
+  s.require_path  = 'lib'
+  s.bindir        = 'bin'
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files spec`.split("\n")
+  s.executables   = `git ls-files bin`.split("\n").map{ |f| File.basename(f) }
+  s.add_runtime_dependency     'rack'
+  s.add_runtime_dependency     'bundler'
+  s.add_runtime_dependency     'fastercsv'
+  s.add_development_dependency 'rake'
+  s.add_development_dependency 'rspec', '>= 2.6'
+  s.has_rdoc      = 'yard'
+  s.license       = 'MIT'
+  s.rubyforge_project = 'jinx'
+end

data/lib/jinx/csv/csvio.rb ADDED Viewed

@@ -0,0 +1,214 @@
+require 'fileutils'
+require 'faster_csv'
+require 'jinx/helpers/options'
+require 'jinx/helpers/collections'
+require 'jinx/csv/joiner'
+module Jinx
+  # CsvIO reads or writes CSV records.
+  # This class wraps a FasterCSV with the following modifications:
+  # * relax the date parser to allow dd/mm/yyyy dates
+  # * don't convert integer text with a leading zero to an octal number
+  # * allow one custom converter with different semantics: if the converter block
+  #   call returns nil, then continue conversion, otherwise return the converter
+  #   result. This differs from FasterCSV converter semantics which calls converters
+  #   as long the result equals the input field value. The CsvIO converter semantics
+  #   supports converters that intend a String result to be the converted result.
+  #
+  # CsvIO is Enumerable, but does not implement the complete Ruby IO interface.
+  class CsvIO
+    include Enumerable
+    # @return [<String>] the CSV field names
+    attr_reader :field_names
+    # @return [<Symbol>] the CSV field value accessor
+    attr_reader :accessors
+    alias :headers :accessors
+    # Opens the CSV file and calls the given block with this CsvIO as the argument.
+    #
+    # @param (see #initialize)
+    # @option (see #initialize)
+    # @yield [csvio] the optional block to execute
+    # @yieldparam [CsvIO] csvio the open CSVIO instance
+    def self.open(dev, opts=nil)
+      csvio = new(dev, opts)
+      if block_given? then
+        begin
+          yield csvio
+        ensure
+          csvio.close
+        end
+      end
+    end
+    # Opens the given CSV file and calls {#each} with the given block.
+    #
+    # @param (see #initialize)
+    # @option (see #initialize)
+    # @yield [row] the block to execute on the row
+    # @yieldparam [{Symbol => Object}] row the field symbol => value hash
+    def self.foreach(file, opts=nil, &block)
+      open(file, opts) { |csvio| csvio.each(&block) }
+    end
+    # Joins the source to the target and writes the output. The match is on all fields
+    # held in common. If there is more than one match, then all but the first match has
+    # empty values for the merged fields. Both files must be sorted in order of the
+    # common fields, sequenced by their occurence in the source header.
+    #
+    # @param [String, IO] source the join source file
+    # @param [{Symbol => String, IO, <String>}] opts the join options
+    # @option opts [String, IO] :to the join target file name or device (default stdin)
+    # @option opts [<String>] :for the target field names (default all target fields)
+    # @option opts [String, IO] :as the output file name or device (default stdout)
+    # @yield (see Csv::Joiner#join)
+    # @yieldparam (see Csv::Joiner#join)
+    def self.join(source, opts, &block)
+      flds = opts[:for] || Array::EMPTY_ARRAY
+      Csv::Joiner.new(source, opts[:to], opts[:as]).join(*flds, &block)
+    end
+    # Creates a new CsvIO for the specified source file.
+    # If a converter block is given, then it is added to the CSV converters list.
+    #
+    # @param [String, IO] dev the CSV file or stream to open
+    # @param [Hash] opts the open options
+    # @option opts [String] :mode the input mode (default +r+)
+    # @option opts [String] :headers the input field headers
+    # @yield [value, info] converts the input value
+    # @yieldparam [String] value the input value
+    # @yieldparam info the current field's FasterCSV FieldInfo metadata
+    # @raise [ArgumentError] if the input is nil
+    def initialize(dev, opts=nil, &converter)
+      raise ArgumentError.new("CSV input argument is missing") if dev.nil?
+      # the CSV file open mode
+      mode = Options.get(:mode, opts, 'r')
+      # the CSV headers option; can be boolean or array
+      hdr_opt = Options.get(:headers, opts)
+      # there is a header record by default for an input CSV file
+      hdr_opt ||= true if mode =~ /^r/
+      # make parent directories if necessary for an output CSV file
+      File.makedirs(File.dirname(dev)) if String == dev and mode =~ /^w/
+      # if headers aren't given, then convert the input CSV header record names to underscore symbols
+      hdr_cvtr = :symbol unless Enumerable === hdr_opt
+      # make a custom converter
+      custom = Proc.new { |value, info| convert(value, info, &converter) }
+      # collect the options
+      csv_opts = {:headers => hdr_opt, :header_converters => hdr_cvtr, :return_headers => true, :write_headers => true, :converters => custom}
+      # Make the parent directory if necessary.
+      FileUtils.mkdir_p(File.dirname(dev)) if String === dev and mode !~ /^r/
+      # open the CSV file
+      @csv = String === dev ? FasterCSV.open(dev, mode, csv_opts) : FasterCSV.new(dev, csv_opts)
+      # the header => field name hash:
+      # if the header option is set to true, then read the input header line.
+      # otherwise, parse an empty string which mimics an input header line.
+      hdr_row = case hdr_opt
+      when true then
+        @csv.shift
+      when Enumerable then
+        ''.parse_csv(:headers => hdr_opt, :header_converters => :symbol, :return_headers => true)
+      else
+        Jinx.fail(ArgumentError, "CSV headers option value not supported: #{hdr_opt}")
+      end
+      # The field value accessors consist of the header row headers converted to a symbol.
+      @accessors = hdr_row.headers
+      # The field names consist of the header row values.
+      @field_names = @accessors.map { |sym| hdr_row[sym] }
+      # the header name => symbol map
+      @hdr_sym_hash = hdr_row.to_hash.invert
+    end
+    # Closes the CSV file.
+    def close
+      @csv.close
+    end
+    # @param [String] header the CSV field header name
+    # @param [Symbol] the header accessor method
+    def accessor(name)
+      @hdr_sym_hash[name]
+    end
+    # Iterates over each CSV row, yielding a row for each iteration.
+    #
+    # @yield [row] processes the CSV row
+    # @yieldparam [FasterCSV::Row] row the CSV row
+    def each(&block)
+      @csv.each(&block)
+    end
+    # Reads the next CSV row.
+    #
+    # @return the next CSV row
+    # @see #each
+    def readline
+      @csv.shift
+    end
+    alias :shift :readline
+    alias :next :readline
+    # Writes the given row to the CSV file.
+    #
+    #@param [{Symbol => Object}] row the input row
+    def write(row)
+      @csv << row
+      @csv.flush
+    end
+    alias :<< :write
+    private
+    # 3-letter months => month sequence hash.
+    MMM_MM_MAP = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'].to_compact_hash_with_index do |mmm, index|
+      index < 9 ? ('0' + index.succ.to_s) : index.succ.to_s
+    end
+    # DateMatcher relaxes the FasterCSV DateMatcher to allow dd/mm/yyyy dates.
+    DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} | \d{1,2}-\w{3}-\d{2,4} | \d{4}[-\/]\d{1,2}[-\/]\d{1,2} | \d{1,2}[-\/]\d{1,2}[-\/]\d{2,4} )\z /x
+    DD_MMM_YYYY_RE = /^(\d{1,2})-([[:alpha:]]{3})-(\d{2,4})$/
+    # @param f the input field value to convert
+    # @param info the CSV field info
+    # @return the converted value
+    def convert(f, info)
+      return if f.nil?
+      # the block has precedence
+      value = yield(f, info) if block_given?
+      # integer conversion
+      value ||= Integer(f) if f =~ /^[1-9]\d*$/
+      # date conversion
+      value ||= convert_date(f) if f =~ CsvIO::DateMatcher
+      # float conversion
+      value ||= (Float(f) rescue f) if f =~ /^\d+\.\d*$/ or f =~ /^\d*\.\d+$/
+      # return converted value or the input field if there was no conversion
+      value || f
+    end
+    # @param [String] the input field value
+    # @return [Date] the converted date
+    def convert_date(f)
+      # If input value is in dd-mmm-yy format, then reformat.
+      # Otherwise, parse as a Date if possible.
+      if f =~ DD_MMM_YYYY_RE then
+        ddmmyy = reformat_dd_mmm_yy_date(f) || return
+        convert_date(ddmmyy)
+      else
+        Date.parse(f, true) rescue nil
+      end
+    end
+    # @param [String] the input field value in dd-mmm-yy format
+    # @return [String] the reformatted date String in mm/dd/yy format
+    def reformat_dd_mmm_yy_date(f)
+      dd, mmm, yy = DD_MMM_YYYY_RE.match(f).captures
+      mm = MMM_MM_MAP[mmm.downcase] || return
+      "#{mm}/#{dd}/#{yy}"
+    end
+  end
+end