RubyGems - imw - Versions diffs - 0.1.1 → 0.2.0 - Mend

imw 0.1.1 → 0.2.0

Files changed (143) hide show

data/.gitignore +4 -1
data/Rakefile +10 -0
data/TODO +18 -0
data/VERSION +1 -1
data/bin/imw +1 -1
data/etc/imwrc.rb +0 -50
data/examples/dataset.rb +12 -0
data/lib/imw/boot.rb +55 -9
data/lib/imw/dataset/paths.rb +15 -24
data/lib/imw/dataset/workflow.rb +131 -72
data/lib/imw/dataset.rb +94 -186
data/lib/imw/parsers/html_parser.rb +1 -1
data/lib/imw/parsers.rb +1 -1
data/lib/imw/repository.rb +3 -27
data/lib/imw/resource.rb +190 -0
data/lib/imw/resources/archive.rb +97 -0
data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
data/lib/imw/resources/archives_and_compressed.rb +32 -0
data/lib/imw/resources/compressed_file.rb +89 -0
data/lib/imw/resources/compressible.rb +77 -0
data/lib/imw/resources/formats/delimited.rb +92 -0
data/lib/imw/resources/formats/excel.rb +125 -0
data/lib/imw/resources/formats/json.rb +53 -0
data/lib/imw/resources/formats/sgml.rb +72 -0
data/lib/imw/resources/formats/yaml.rb +53 -0
data/lib/imw/resources/formats.rb +32 -0
data/lib/imw/resources/local.rb +198 -0
data/lib/imw/resources/remote.rb +110 -0
data/lib/imw/resources/schemes/hdfs.rb +242 -0
data/lib/imw/resources/schemes/http.rb +161 -0
data/lib/imw/resources/schemes/s3.rb +137 -0
data/lib/imw/resources/schemes.rb +19 -0
data/lib/imw/resources.rb +118 -0
data/lib/imw/runner.rb +5 -4
data/lib/imw/transforms/archiver.rb +215 -0
data/lib/imw/transforms/transferer.rb +103 -0
data/lib/imw/transforms.rb +8 -0
data/lib/imw/utils/error.rb +26 -30
data/lib/imw/utils/extensions/array.rb +5 -15
data/lib/imw/utils/extensions/hash.rb +6 -16
data/lib/imw/utils/extensions/hpricot.rb +0 -14
data/lib/imw/utils/extensions/string.rb +5 -15
data/lib/imw/utils/extensions/symbol.rb +0 -13
data/lib/imw/utils/extensions.rb +65 -0
data/lib/imw/utils/log.rb +14 -13
data/lib/imw/utils/misc.rb +0 -6
data/lib/imw/utils/paths.rb +101 -42
data/lib/imw/utils/version.rb +8 -9
data/lib/imw/utils.rb +2 -18
data/lib/imw.rb +92 -17
data/spec/data/sample.csv +1 -1
data/spec/data/sample.json +1 -0
data/spec/data/sample.tsv +1 -1
data/spec/data/sample.txt +1 -1
data/spec/data/sample.xml +1 -1
data/spec/data/sample.yaml +1 -1
data/spec/imw/dataset/paths_spec.rb +32 -0
data/spec/imw/dataset/workflow_spec.rb +41 -0
data/spec/imw/resource_spec.rb +79 -0
data/spec/imw/resources/archive_spec.rb +69 -0
data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
data/spec/imw/resources/compressed_file_spec.rb +48 -0
data/spec/imw/resources/compressible_spec.rb +36 -0
data/spec/imw/resources/formats/delimited_spec.rb +33 -0
data/spec/imw/resources/formats/json_spec.rb +32 -0
data/spec/imw/resources/formats/sgml_spec.rb +24 -0
data/spec/imw/resources/formats/yaml_spec.rb +41 -0
data/spec/imw/resources/local_spec.rb +98 -0
data/spec/imw/resources/remote_spec.rb +35 -0
data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
data/spec/imw/resources/schemes/http_spec.rb +19 -0
data/spec/imw/resources/schemes/s3_spec.rb +19 -0
data/spec/imw/transforms/archiver_spec.rb +120 -0
data/spec/imw/transforms/transferer_spec.rb +113 -0
data/spec/imw/utils/paths_spec.rb +5 -33
data/spec/imw/utils/shared_paths_spec.rb +29 -0
data/spec/spec_helper.rb +5 -5
data/spec/support/paths_matcher.rb +67 -0
data/spec/support/random.rb +39 -36
metadata +88 -75
data/lib/imw/dataset/task.rb +0 -41
data/lib/imw/files/archive.rb +0 -113
data/lib/imw/files/basicfile.rb +0 -122
data/lib/imw/files/binary.rb +0 -28
data/lib/imw/files/compressed_file.rb +0 -93
data/lib/imw/files/compressed_files_and_archives.rb +0 -334
data/lib/imw/files/compressible.rb +0 -103
data/lib/imw/files/csv.rb +0 -113
data/lib/imw/files/directory.rb +0 -62
data/lib/imw/files/excel.rb +0 -84
data/lib/imw/files/json.rb +0 -41
data/lib/imw/files/sgml.rb +0 -46
data/lib/imw/files/text.rb +0 -68
data/lib/imw/files/yaml.rb +0 -46
data/lib/imw/files.rb +0 -125
data/lib/imw/packagers/archiver.rb +0 -126
data/lib/imw/packagers/s3_mover.rb +0 -36
data/lib/imw/packagers.rb +0 -8
data/lib/imw/utils/components.rb +0 -61
data/lib/imw/utils/config.rb +0 -46
data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
data/lib/imw/utils/extensions/core.rb +0 -27
data/lib/imw/utils/extensions/dir.rb +0 -24
data/lib/imw/utils/extensions/file_core.rb +0 -64
data/lib/imw/utils/extensions/typed_struct.rb +0 -22
data/lib/imw/utils/extensions/uri.rb +0 -59
data/lib/imw/utils/view/dump_csv.rb +0 -112
data/lib/imw/utils/view/dump_csv_older.rb +0 -117
data/lib/imw/utils/view.rb +0 -113
data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
data/spec/imw/files/archive_spec.rb +0 -118
data/spec/imw/files/basicfile_spec.rb +0 -121
data/spec/imw/files/bz2_spec.rb +0 -32
data/spec/imw/files/compressed_file_spec.rb +0 -96
data/spec/imw/files/compressible_spec.rb +0 -100
data/spec/imw/files/file_spec.rb +0 -144
data/spec/imw/files/gz_spec.rb +0 -32
data/spec/imw/files/rar_spec.rb +0 -33
data/spec/imw/files/tar_spec.rb +0 -31
data/spec/imw/files/text_spec.rb +0 -23
data/spec/imw/files/zip_spec.rb +0 -31
data/spec/imw/files_spec.rb +0 -38
data/spec/imw/packagers/archiver_spec.rb +0 -125
data/spec/imw/packagers/s3_mover_spec.rb +0 -7
data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
data/spec/imw/utils/extensions/find_spec.rb +0 -113
data/spec/imw/workflow/rip/local_spec.rb +0 -89
data/spec/imw/workflow/rip_spec.rb +0 -27
data/spec/support/archive_contents_matcher.rb +0 -94
data/spec/support/directory_contents_matcher.rb +0 -61

data/lib/imw/utils/extensions/file_core.rb DELETED Viewed

@@ -1,64 +0,0 @@
-#
-# h2. lib/imw/utils/extensions/file.rb -- extensions to built-in file class
-#
-# == About
-#
-# Author::    (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
-# Copyright:: Copyright (c) 2008 infochimps.org
-# License::   GPL 3.0
-# Website::   http://infinitemonkeywrench.org/
-#
-require 'imw/utils/error'
-require 'imw/utils/config'
-require 'imw/utils/extensions/string'
-class File
-  # Returns the name of the path given:
-  #
-  #   File.name_of_file("/path/to/somefile.txt") => "somefile".
-  def self.name_of_file path
-    basename(path)[0,basename(path).length - extname(path).length]
-  end
-  # Returns what would be the handle of a source or dataset
-  # described by a file at +path+:
-  #
-  #   File.handle "/path/to/a_particular_dataset.instructions.yaml"  #=> :a_particular_dataset
-  def self.handle path
-    File.basename(path).split('.').first.handle
-  end
-  # Returns a unique (non-existing) version of the given +path+ by
-  # appending successive intgers, useful for copying files ito
-  # directories without clobbering existing files (a la <tt>wget
-  # -nc</tt>).
-  #
-  # In a directory <tt>/path/to</tt> without a file named
-  # <tt>data.txt</tt>
-  #
-  #   File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt"</tt>
-  #
-  # If <tt>data.txt</tt> were to already exist in that directory, then
-  #
-  #   File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.1"
-  #
-  # If <tt>data.txt.1</tt> were to already exist then
-  #
-  #   File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.2"
-  #
-  # and so on.
-  def self.uniquify path
-    orig_path = path.clone
-    copy_number = 1
-    while exist? path do
-      path = orig_path + ".#{copy_number}"
-      copy_number += 1
-    end
-    path
-  end
-end
-# puts "#{File.basename(__FILE__)}: You add a bit of glitter and jazz to all the folders in the cabinet.  It makes you feel happier when you have to sort through them." # at bottom

data/lib/imw/utils/extensions/typed_struct.rb DELETED Viewed

@@ -1,22 +0,0 @@
-#
-# A struct
-# but has an idea of what type attributes should be
-#
-#
-class TypedStruct < Struct
-  def self.new attrs, convs
-    struct = super *attrs
-    struct_attr_convs = Hash.zip(attrs, convs).reject{|a,t| t.nil? }
-    struct.class_eval do
-      cattr_accessor :attr_convs
-      self.attr_convs = struct_attr_convs
-      def remap!
-        attr_convs.each do |attr, conv|
-          curr = self.send(attr)
-          self.send("#{attr}=", curr.send(conv)) if curr.respond_to?(conv)
-        end
-      end
-    end # class_eval
-    struct
-  end
-end

data/lib/imw/utils/extensions/uri.rb DELETED Viewed

@@ -1,59 +0,0 @@
-#
-# h2. lib/imw/utils/extensions/uri.rb -- extensions to uri module
-#
-# == About
-#
-# Some useful extensions to the +URI+ module.
-#
-# Author::    (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
-# Copyright:: Copyright (c) 2008 infochimps.org
-# License::   GPL 3.0
-# Website::   http://infinitemonkeywrench.org/
-#
-require 'uri'
-module URI
-  # List of prefixes ignored when returning domains (or reversed
-  # domains).
-  IGNORED_PREFIXES = ['www']
-  # Returns the domain of the given URI, first scrubbing it of any
-  # prefixes we can ignore.
-  def self.domain(uri)
-      uriobj = self.parse(uri)
-    if uriobj.host then
-      host = uriobj.host
-    elsif uriobj.path then
-      host = uriobj.path.split('/')[0]
-    else
-      raise ArgumentError, "Invalid URI: #{uri}"
-    end
-    # remove any ignored prefixes from the hostname (i.e. - 'www')
-    parts = host.split('.')
-    parts = (IGNORED_PREFIXES.member?(parts[0]) ? parts[1...parts.size] : parts)
-    host = parts.join('.')
-    host
-  end
-  # Returns the reversed domain of the given URI, first scrubbing it of
-  # any prefixes we can ignore.  Will not reverse numeric addresses of
-  # the form 127.0.0.1
-  def self.reverse_domain(uri)
-    begin
-      d = self.domain(uri)
-      # check for numeric ip
-      # in a TERRIBLE way that needs to be fixed!`
-      if d=~/^[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*$/ then
-        return d
-      else
-        return d.split('.').reverse.join('.')
-      end
-    rescue URI::InvalidURIError,ArgumentError
-      raise $!
-    end
-  end
-end
-# puts "#{File.basename(__FILE__)}: In the end, it's either you or I." # at bottom

data/lib/imw/utils/view/dump_csv.rb DELETED Viewed

@@ -1,112 +0,0 @@
-# #
-# # views
-# #
-# require 'imw/view/db_infochimps'
-#
-#
-# This is where views of the metadata will go (right now it's all just
-# sitting in a crapheap within model.rb).
-#
-# we'll have routines for
-#
-# - dumping/undumping to yaml
-# - dumping/undumping to files that load right into the ics database.
-#
-class IMWObject
-  def self.from_icss(hsh)
-    # lists of dumpables
-    self._attr_objlists.each do |attr, cl|
-      if (vals = hsh.delete(attr.to_s))
-        hsh[attr] = vals.map{ |val| cl.from_icss(val) }
-      end
-    end
-    # simply dumpable objects
-    self._attr_objs.each do |attr, cl|
-      if (val = hsh.delete(attr.to_s))
-        hsh[attr] = cl.from_icss(val)
-      end
-    end
-    self.new(hsh)
-  end
-  # Dump as a plain hash
-  def to_icss()
-    hsh = instance_values
-    # lists of dumpable objects
-    self.class._attr_objlists.keys.map(&:to_s).each do |attr|
-      hsh[attr] = (hsh.delete(attr)||[]).map{ |a| a.to_icss() }
-    end
-    # simply dumpable objects
-    self.class._attr_objs.keys.map(&:to_s).each do |attr|
-      (v=hsh.delete attr) && hsh[attr] = v.to_icss
-    end
-    hsh
-  end
-  # Pivot from object to relational view
-  def to_csv(parent_id=nil)
-    tables   = {}
-    sub_ids  = []
-    my_cl    = self.class.to_s
-    self.class._attr_objs.sort.each do |attr, cl|
-      tables[attr] ||= [] ; tables[attr].push(self[attr].to_csv(id))
-      join = "%s_%s" % [my_cl, cl.to_s].sort
-      tables[join] ||= [] ; tables[join].push(id, self[attr].id)
-      sub_ids.push self[attr].handle
-    end
-    self.class._attr_objlists.sort.each do |attr, cl|
-      tables[attr] ||= []
-      join = "%s_%s" % [my_cl, cl.to_s].sort
-      tables[join] ||= []
-      self[attr].each do |obj|
-        tables[attr].push(obj.to_csv(id))
-        tables[join].push(id, obj.id)
-        sub_ids.push obj.handle
-      end
-    end
-    tables[self.class.to_s] = [
-      [self.id, parent_id].compact   +
-      slice(self.class._attr_scalars.keys - [:id]) +
-      sub_ids
-    ]
-    tables
-  end
-end
-class Note  < IMWObject
-  def to_pair()
-    { self.handle => self.desc }
-  end
-  def to_icss()
-    to_pair
-  end
-  def self.from_icss(pair)
-    self.new Hash.zip([:handle,:desc], pair.to_pair)
-  end
-end
-class TagList
-  def self.from_icss(str)
-    self.from(str)
-  end
-  def to_icss()
-    self.to_s
-  end
-  def to_csv(parent_id=nil)
-    [self.to_s]
-  end
-  def handle() to_s end
-end
-# You acquire the vision of a sharp-eyed tanzier.  We'll just assume that's good.

data/lib/imw/utils/view/dump_csv_older.rb DELETED Viewed

@@ -1,117 +0,0 @@
-# #
-# # views
-# #
-# require 'imw/view/db_infochimps'
-#
-#
-# This is where views of the metadata will go (right now it's all just
-# sitting in a crapheap within model.rb).
-#
-# we'll have routines for
-#
-# - dumping/undumping to yaml
-# - dumping/undumping to files that load right into the ics database.
-#
-class IMWBase
-  def self.from_icss(hsh)
-    # simply dumpable objects
-    self._attr_has_one.map(&:to_s).each do |attr|
-      if (val = hsh.delete(attr.to_s))
-        hsh[attr] = get_attr_class(attr).from_icss(val)
-      end
-    end
-    # lists of dumpables
-    self._attr_manys.each do |attr|
-      if (vals = hsh.delete(attr.to_s))
-        hsh[attr] = vals.map{ |val| get_attr_class(attr).from_icss(val) }
-      end
-    end
-    self.new(hsh)
-  end
-  # Dump as a plain hash
-  def to_icss()
-    hsh = instance_values
-    # simply dumpable objects
-    self.class._attr_has_one.map(&:to_s).each do |attr|
-      (v=hsh.delete attr) && hsh[attr] = v.to_icss
-    end
-    # lists of dumpable objects
-    self.class._attr_manys.each do |attr|
-      hsh[attr] = (hsh.delete(attr)||[]).map{ |a| a.to_icss() }
-    end
-    hsh
-  end
-  # Pivot from object to relational view
-  def to_csv(parent_id=nil)
-    tables   = {}
-    sub_ids  = []
-    my_cl    = self.class.to_s
-    self.class._attr_has_one.map(&:to_s).sort.each do |attr|
-      # Banks the object
-      obj = self[attr]
-      cl  = self.class.get_attr_class(attr).to_s
-      tables[attr] ||= [] ; tables[attr].push( obj.to_csv(id) )
-      # tie the parent and child together
-      join = "%s_%s" % [my_cl, cl].sort
-      tables[join] ||= [] ; tables[join].push( [id, obj.id] )
-      sub_ids.push obj.handle
-    end
-    self.class._attr_manys.sort.each do |attr|
-      objs = self[attr] or next
-      cl   = self.class.get_attr_class(attr).to_s
-      tables[attr] ||= []
-      join = "%s_%s" % [my_cl, cl.to_s].sort
-      tables[join] ||= []
-      objs.each do |obj|
-        tables[attr].push(obj.to_csv(id))
-        tables[join].push(id, obj.id)
-        sub_ids.push obj.handle
-      end
-    end
-    tables[self.class.to_s] = [
-      [self.id, parent_id].compact   +
-      slice(self.class._attr_scalar - [:id]) +
-      sub_ids
-    ].zip(['id', 'pid']+(self.class._attr_scalar - [:id])+self.class._attr_has_one.map(&:to_s).sort)
-    tables
-  end
-end
-class Note  < IMWBase
-  # { :format_name => {}, ... } -- must be a hash
-  def to_pair()
-    { self.handle => self.desc }
-  end
-  def to_icss()
-    to_pair
-  end
-  def self.from_icss(pair)
-    self.new Hash.zip([:handle,:desc], pair.to_pair)
-  end
-end
-class TagList
-  def self.from_icss(str)
-    self.from(str)
-  end
-  def to_icss()
-    self.to_s
-  end
-  def to_csv(parent_id=nil)
-    [self.to_s]
-  end
-  def handle() to_s end
-end

data/lib/imw/utils/view.rb DELETED Viewed

@@ -1,113 +0,0 @@
-class ActiveRecord::Base
-  class << self
-  end
-  # def merge!(hsh)
-  #   hsh = hsh.dup
-  #   # puts hsh.to_yaml
-  #   # has_many datasets, notes, fields, contributors
-  #   self.class.reflect_on_all_associations.each do |ass|
-  #     # ["@macro", "@class_name", "@name", "@primary_key_name", "@options",
-  #     #  "@klass",
-  #     #  "@through_reflection",
-  #     #  "@active_record",
-  #     puts [ass.name, ass.macro, ass.primary_key_name].to_yaml
-  #     if ass.macro == :has_many
-  #       els = hsh.delete(ass.name.to_s) || []
-  #       puts "!!!!!!!!!!!!!!!!!!!!!!!!!!", els, '!!'
-  #       els.each do |el|
-  #         puts el
-  #         self[ass.name] = ass.klass.new().merge!(el)
-  #       end
-  #     end
-  #     hsh.each do |key,val|
-  #       self[key] = val
-  #     end
-  #     p self
-  #     p self.datasets if self.respond_to? 'datasets'
-  #   end
-  # end
-  def undump(hsh)
-    puts "unumping from #{hsh.to_json}"
-    hsh.each{ |k,v| self[k] = v }
-    self.save!
-    self
-  end
-end
-class Pool < ActiveRecord::Base
-  def undump(hsh)
-    { :datasets => Dataset, :fields => Field,
-      :contributors => Contributor, :pool_notes => PoolNote }.each do |field, klass|
-      vals = hsh.delete(field.to_s) || []
-      puts "Undumping #{vals} info #{field}"
-      self[field.to_s] = vals.map{|val| f = klass.new().undump(val); f.save!; f}
-    end
-    super
-    self
-  end
-end
-class Dataset < ActiveRecord::Base
-  def undump(hsh)
-    { :datasets => Dataset, :fields => Field,
-      :contributors => Contributor, :dataset_notes => DatasetNote }.each do |field, klass|
-      vals = hsh.delete(field.to_s) || []
-      puts "Undumping #{vals} info #{field}"
-      self[field.to_s] = vals.map{|val| f = klass.new().undump(val); f.save!; f}
-    end
-    super
-    puts "Got Dataset #{self.to_yaml}"
-    self
-  end
-end
-class IMW < OpenStruct
-  #
-  # Takes an Infochimps Stupid Schema stream and
-  # constructs the corresponding objects.
-  #
-  # Here are the rules:
-  # * the schema has the structure
-  #   # this has to be first.
-  #   - infochimps_schema:
-  #       schema_version:     0.2  # in case stuff changes
-  #   # then any number of imw objects:
-  #   - pool:         (...)
-  #       fields:         [era, innings_pitched,
-  #   - dataset:      (...)
-  #       fields:
-  #         - name:       Earned Run Average
-  #           handle:   era
-  #           concept:    baseball-era
-  #           units:      earned_runs / (9*innings_pitched)
-  #   - contributor:  (...)
-  #   - field:        (...)
-  #
-  # * Objects are referred to by __handle__, *NOT* __id__. If an ID is
-  #   included, and an object exists with a non-matching ID or handle,
-  #   an error will be raised.
-  #
-  # * We want to make the schema files maintainable by hand, which means that
-  #   the loader tries to be smart about inline-defined objects.  That is, you
-  #   can either refer to (via handle) a field defined elsewhere, or you can
-  #   define the field in whole, and trust that the Right Thing will
-  #   happen. This presents the problem of collisions, though. If a bulk object
-  #   update arrives, we need to know whom to believe -- bulk loader or
-  #   database.  In the absence of versioning: we look up the object by its
-  #   handle.  If there's an existing object, any new information (fields with
-  #   values in new that are blank in old) is added to it.  If the object is
-  #   defined at the top level, it wins; if the object is defined as a sub field
-  #   it loses.
-  #
-  # * Every interesting object (Pool, Dataset, Contributor, Field) has a desc:
-  #   attribute (for Pool and Dataset it's virtual but never mind) to describe
-  #   __itself__.  Additionally, every interesting relationship has its own desc: field.
-  #
-  def self.undump(schema)
-    # compact then merge -- kill off blank
-  end
-end

data/spec/imw/dataset/datamapper/uri_spec.rb DELETED Viewed

@@ -1,43 +0,0 @@
-require File.join(File.dirname(__FILE__),'../../../spec_helper')
-require File.join(File.dirname(__FILE__),'../datamapper_spec_helper')
-include IMW
-require 'imw/dataset/datamapper/uri'
-if IMW::SpecConfig::TEST_WITH_DATAMAPPER
-  IMW::SpecConfig.setup_datamapper_test_db
-  describe IMW do
-    before(:each) do
-      DM_URI.all.each do |u| u.destroy  end
-    end
-    it "makes a URI from a barely complete string" do
-      DM_URI.find_or_create_from_url('google.com')
-      u = DM_URI.first
-      u.should_not be_nil
-      u.host.should == 'google.com'
-    end
-    it "behaves as normalized" do
-      DM_URI.find_or_create_from_url('google.com')
-      u = DM_URI.first
-      u.path.should   == '/'
-      u.scheme.should == 'http'
-      u.port.should   be_nil
-    end
-    it "makes a complicated URI from a complicated string" do
-      DM_URI.find_or_create_from_url('http://me:and@your.mom.com:69/what?orly=yarly&ok=then')
-      dm_uri = DM_URI.first({
-          :scheme => 'http', :host => 'your.mom.com', :port => '69',
-          :query => 'what?orly=yarly&ok=then'
-        })
-    end
-    # it converts to a file path
-    # it doesn't leave a trailing / on the file path
-    # it escapes unicode URLs
-    # it escapes non-URL chars in URL
-  end
-end

data/spec/imw/dataset/datamapper_spec_helper.rb DELETED Viewed

@@ -1,11 +0,0 @@
-require 'imw/dataset/datamapper'
-module IMW::SpecConfig
-  def self.setup_datamapper_test_db
-    IMW::Dataset.setup_remote_connection IMW::DEFAULT_DATABASE_CONNECTION_PARAMS.merge({
-        :dbname => 'imw_dataset_datamapper_test' })
-    DataMapper.auto_migrate!
-  end
-end

data/spec/imw/files/archive_spec.rb DELETED Viewed

@@ -1,118 +0,0 @@
-#
-# h2. spec/imw/model/files/archive_spec.rb -- module for use in testing various archive formats
-#
-# == About
-#
-# The <tt>IMW::Files::Archive</tt> module doesn't implement any
-# functionality of its own but merely adds methods to an including
-# class.  Appropriately, this spec file implements a shared example
-# group ("an archive of files")  which can be including
-# by the spec of an archive class.  This spec must also define the
-# following instance variables:
-#
-# <tt>@archive</tt>:: a subclass of <tt>IMW::Files::BasicFile</tt> which
-# has the <tt>IMW::Files::Archive</tt> module mixed in.
-#
-# <tt>@root_directory</tt>: a string specifying the path where all the
-# files will be created
-#
-# <tt>@initial_directory</tt>: a string specifying the path where some
-# files for the initial creation of the archive will be created.
-#
-# <tt>@appending_directory</tt>: a string specifying the path where
-# all some files for appending to the archive will be created.
-#
-# <tt>@extraction_directory</tt>: a string specifying the path where
-# the archive's files will be extracted.
-#
-# Author::    (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
-# Copyright:: Copyright (c) 2008 infochimps.org
-# License::   GPL 3.0
-# Website::   http://infinitemonkeywrench.org/
-#
-require File.join(File.dirname(__FILE__),'../../../spec_helper')
-require IMW_SPEC_DIR+'/imw/matchers/archive_contents_matcher'
-require IMW_SPEC_DIR+'/imw/matchers/directory_contents_matcher'
-require 'imw/utils/random'
-require 'imw/utils/extensions/find'
-share_examples_for "an archive of files" do
-  include Spec::Matchers::IMW
-  def create_random_files
-    IMW::Random.directory_with_files(@initial_directory)
-    IMW::Random.directory_with_files(@appending_directory)
-    FileUtils.mkdir(@extraction_directory)
-  end
-  def delete_random_files
-    FileUtils.rm_rf [@root_directory,@extraction_directory]
-  end
-  before(:each) do
-    create_random_files
-  end
-  after(:each) do
-    delete_random_files
-    FileUtils.rm(@archive.path) if @archive.exist?
-  end
-  describe "(listing)" do
-    it "should raise an error when listing a non-existent archive" do
-      lambda { @archive.contents }.should raise_error(IMW::Error)
-    end
-  end
-  describe "(creation)" do
-    it "should be able to create archives which match a directory's structure" do
-      @archive.create(@initial_directory + "/*")
-      @archive.should contain_paths_like(@initial_directory, :relative_to => @root_directory)
-    end
-    it "should raise an error if trying to overwrite an archive without the :force option" do
-      @archive.create(@initial_directory + "/*")
-      lambda { @archive.create(@initial_directory + "/*") }.should raise_error(IMW::Error)
-    end
-    it "should overwrite an archive if the :force option is given" do
-      @archive.create(@initial_directory + "/*")
-      @archive.create(@initial_directory + "/*", :force => true)
-      @archive.should contain_paths_like(@initial_directory, :relative_to => @root_directory)
-    end
-  end
-  describe "(appending)" do
-    it "should append to an archive which already exists" do
-      @archive.create(@initial_directory + "/*")
-      @archive.append(@appending_directory + "/*")
-      @archive.should contain_paths_like([@initial_directory,@appending_directory], :relative_to => @root_directory)
-    end
-    it "should append to an archive which doesn't already exist" do
-      @archive.append(@appending_directory + "/*")
-      @archive.should contain_paths_like(@appending_directory, :relative_to => @root_directory)
-    end
-  end
-  describe "(extracting)" do
-    it "should raise an error when trying to extract from a non-existing archive" do
-      lambda { @archive.extract }.should raise_error(IMW::Error)
-    end
-    it "should extract files which match the original ones it archived" do
-      @archive.create(@initial_directory + "/*")
-      @archive.append(@appending_directory + "/*")
-      new_archive = @archive.cp(@extraction_directory + '/' + @archive.basename)
-      new_archive.extract
-      @extraction_directory.should contain_files_matching_directory(@root_directory)
-    end
-  end
-end unless defined? IMW_FILES_ARCHIVE_SHARED_SPEC
-# puts "#{File.basename(__FILE__)}: How many drunken frat boys can fit in an Internet kiosk?" # at bottom