imw 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -1
- data/Rakefile +10 -0
- data/TODO +18 -0
- data/VERSION +1 -1
- data/bin/imw +1 -1
- data/etc/imwrc.rb +0 -50
- data/examples/dataset.rb +12 -0
- data/lib/imw/boot.rb +55 -9
- data/lib/imw/dataset/paths.rb +15 -24
- data/lib/imw/dataset/workflow.rb +131 -72
- data/lib/imw/dataset.rb +94 -186
- data/lib/imw/parsers/html_parser.rb +1 -1
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +3 -27
- data/lib/imw/resource.rb +190 -0
- data/lib/imw/resources/archive.rb +97 -0
- data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
- data/lib/imw/resources/archives_and_compressed.rb +32 -0
- data/lib/imw/resources/compressed_file.rb +89 -0
- data/lib/imw/resources/compressible.rb +77 -0
- data/lib/imw/resources/formats/delimited.rb +92 -0
- data/lib/imw/resources/formats/excel.rb +125 -0
- data/lib/imw/resources/formats/json.rb +53 -0
- data/lib/imw/resources/formats/sgml.rb +72 -0
- data/lib/imw/resources/formats/yaml.rb +53 -0
- data/lib/imw/resources/formats.rb +32 -0
- data/lib/imw/resources/local.rb +198 -0
- data/lib/imw/resources/remote.rb +110 -0
- data/lib/imw/resources/schemes/hdfs.rb +242 -0
- data/lib/imw/resources/schemes/http.rb +161 -0
- data/lib/imw/resources/schemes/s3.rb +137 -0
- data/lib/imw/resources/schemes.rb +19 -0
- data/lib/imw/resources.rb +118 -0
- data/lib/imw/runner.rb +5 -4
- data/lib/imw/transforms/archiver.rb +215 -0
- data/lib/imw/transforms/transferer.rb +103 -0
- data/lib/imw/transforms.rb +8 -0
- data/lib/imw/utils/error.rb +26 -30
- data/lib/imw/utils/extensions/array.rb +5 -15
- data/lib/imw/utils/extensions/hash.rb +6 -16
- data/lib/imw/utils/extensions/hpricot.rb +0 -14
- data/lib/imw/utils/extensions/string.rb +5 -15
- data/lib/imw/utils/extensions/symbol.rb +0 -13
- data/lib/imw/utils/extensions.rb +65 -0
- data/lib/imw/utils/log.rb +14 -13
- data/lib/imw/utils/misc.rb +0 -6
- data/lib/imw/utils/paths.rb +101 -42
- data/lib/imw/utils/version.rb +8 -9
- data/lib/imw/utils.rb +2 -18
- data/lib/imw.rb +92 -17
- data/spec/data/sample.csv +1 -1
- data/spec/data/sample.json +1 -0
- data/spec/data/sample.tsv +1 -1
- data/spec/data/sample.txt +1 -1
- data/spec/data/sample.xml +1 -1
- data/spec/data/sample.yaml +1 -1
- data/spec/imw/dataset/paths_spec.rb +32 -0
- data/spec/imw/dataset/workflow_spec.rb +41 -0
- data/spec/imw/resource_spec.rb +79 -0
- data/spec/imw/resources/archive_spec.rb +69 -0
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
- data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
- data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
- data/spec/imw/resources/compressed_file_spec.rb +48 -0
- data/spec/imw/resources/compressible_spec.rb +36 -0
- data/spec/imw/resources/formats/delimited_spec.rb +33 -0
- data/spec/imw/resources/formats/json_spec.rb +32 -0
- data/spec/imw/resources/formats/sgml_spec.rb +24 -0
- data/spec/imw/resources/formats/yaml_spec.rb +41 -0
- data/spec/imw/resources/local_spec.rb +98 -0
- data/spec/imw/resources/remote_spec.rb +35 -0
- data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
- data/spec/imw/resources/schemes/http_spec.rb +19 -0
- data/spec/imw/resources/schemes/s3_spec.rb +19 -0
- data/spec/imw/transforms/archiver_spec.rb +120 -0
- data/spec/imw/transforms/transferer_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +5 -33
- data/spec/imw/utils/shared_paths_spec.rb +29 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/support/paths_matcher.rb +67 -0
- data/spec/support/random.rb +39 -36
- metadata +88 -75
- data/lib/imw/dataset/task.rb +0 -41
- data/lib/imw/files/archive.rb +0 -113
- data/lib/imw/files/basicfile.rb +0 -122
- data/lib/imw/files/binary.rb +0 -28
- data/lib/imw/files/compressed_file.rb +0 -93
- data/lib/imw/files/compressed_files_and_archives.rb +0 -334
- data/lib/imw/files/compressible.rb +0 -103
- data/lib/imw/files/csv.rb +0 -113
- data/lib/imw/files/directory.rb +0 -62
- data/lib/imw/files/excel.rb +0 -84
- data/lib/imw/files/json.rb +0 -41
- data/lib/imw/files/sgml.rb +0 -46
- data/lib/imw/files/text.rb +0 -68
- data/lib/imw/files/yaml.rb +0 -46
- data/lib/imw/files.rb +0 -125
- data/lib/imw/packagers/archiver.rb +0 -126
- data/lib/imw/packagers/s3_mover.rb +0 -36
- data/lib/imw/packagers.rb +0 -8
- data/lib/imw/utils/components.rb +0 -61
- data/lib/imw/utils/config.rb +0 -46
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
- data/lib/imw/utils/extensions/core.rb +0 -27
- data/lib/imw/utils/extensions/dir.rb +0 -24
- data/lib/imw/utils/extensions/file_core.rb +0 -64
- data/lib/imw/utils/extensions/typed_struct.rb +0 -22
- data/lib/imw/utils/extensions/uri.rb +0 -59
- data/lib/imw/utils/view/dump_csv.rb +0 -112
- data/lib/imw/utils/view/dump_csv_older.rb +0 -117
- data/lib/imw/utils/view.rb +0 -113
- data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
- data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
- data/spec/imw/files/archive_spec.rb +0 -118
- data/spec/imw/files/basicfile_spec.rb +0 -121
- data/spec/imw/files/bz2_spec.rb +0 -32
- data/spec/imw/files/compressed_file_spec.rb +0 -96
- data/spec/imw/files/compressible_spec.rb +0 -100
- data/spec/imw/files/file_spec.rb +0 -144
- data/spec/imw/files/gz_spec.rb +0 -32
- data/spec/imw/files/rar_spec.rb +0 -33
- data/spec/imw/files/tar_spec.rb +0 -31
- data/spec/imw/files/text_spec.rb +0 -23
- data/spec/imw/files/zip_spec.rb +0 -31
- data/spec/imw/files_spec.rb +0 -38
- data/spec/imw/packagers/archiver_spec.rb +0 -125
- data/spec/imw/packagers/s3_mover_spec.rb +0 -7
- data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
- data/spec/imw/utils/extensions/find_spec.rb +0 -113
- data/spec/imw/workflow/rip/local_spec.rb +0 -89
- data/spec/imw/workflow/rip_spec.rb +0 -27
- data/spec/support/archive_contents_matcher.rb +0 -94
- data/spec/support/directory_contents_matcher.rb +0 -61
@@ -1,64 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. lib/imw/utils/extensions/file.rb -- extensions to built-in file class
|
3
|
-
#
|
4
|
-
# == About
|
5
|
-
#
|
6
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
7
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
8
|
-
# License:: GPL 3.0
|
9
|
-
# Website:: http://infinitemonkeywrench.org/
|
10
|
-
#
|
11
|
-
|
12
|
-
require 'imw/utils/error'
|
13
|
-
require 'imw/utils/config'
|
14
|
-
require 'imw/utils/extensions/string'
|
15
|
-
|
16
|
-
class File
|
17
|
-
|
18
|
-
# Returns the name of the path given:
|
19
|
-
#
|
20
|
-
# File.name_of_file("/path/to/somefile.txt") => "somefile".
|
21
|
-
def self.name_of_file path
|
22
|
-
basename(path)[0,basename(path).length - extname(path).length]
|
23
|
-
end
|
24
|
-
|
25
|
-
# Returns what would be the handle of a source or dataset
|
26
|
-
# described by a file at +path+:
|
27
|
-
#
|
28
|
-
# File.handle "/path/to/a_particular_dataset.instructions.yaml" #=> :a_particular_dataset
|
29
|
-
def self.handle path
|
30
|
-
File.basename(path).split('.').first.handle
|
31
|
-
end
|
32
|
-
|
33
|
-
# Returns a unique (non-existing) version of the given +path+ by
|
34
|
-
# appending successive intgers, useful for copying files ito
|
35
|
-
# directories without clobbering existing files (a la <tt>wget
|
36
|
-
# -nc</tt>).
|
37
|
-
#
|
38
|
-
# In a directory <tt>/path/to</tt> without a file named
|
39
|
-
# <tt>data.txt</tt>
|
40
|
-
#
|
41
|
-
# File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt"</tt>
|
42
|
-
#
|
43
|
-
# If <tt>data.txt</tt> were to already exist in that directory, then
|
44
|
-
#
|
45
|
-
# File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.1"
|
46
|
-
#
|
47
|
-
# If <tt>data.txt.1</tt> were to already exist then
|
48
|
-
#
|
49
|
-
# File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.2"
|
50
|
-
#
|
51
|
-
# and so on.
|
52
|
-
def self.uniquify path
|
53
|
-
orig_path = path.clone
|
54
|
-
copy_number = 1
|
55
|
-
while exist? path do
|
56
|
-
path = orig_path + ".#{copy_number}"
|
57
|
-
copy_number += 1
|
58
|
-
end
|
59
|
-
path
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|
63
|
-
|
64
|
-
# puts "#{File.basename(__FILE__)}: You add a bit of glitter and jazz to all the folders in the cabinet. It makes you feel happier when you have to sort through them." # at bottom
|
@@ -1,22 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# A struct
|
3
|
-
# but has an idea of what type attributes should be
|
4
|
-
#
|
5
|
-
#
|
6
|
-
class TypedStruct < Struct
|
7
|
-
def self.new attrs, convs
|
8
|
-
struct = super *attrs
|
9
|
-
struct_attr_convs = Hash.zip(attrs, convs).reject{|a,t| t.nil? }
|
10
|
-
struct.class_eval do
|
11
|
-
cattr_accessor :attr_convs
|
12
|
-
self.attr_convs = struct_attr_convs
|
13
|
-
def remap!
|
14
|
-
attr_convs.each do |attr, conv|
|
15
|
-
curr = self.send(attr)
|
16
|
-
self.send("#{attr}=", curr.send(conv)) if curr.respond_to?(conv)
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end # class_eval
|
20
|
-
struct
|
21
|
-
end
|
22
|
-
end
|
@@ -1,59 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. lib/imw/utils/extensions/uri.rb -- extensions to uri module
|
3
|
-
#
|
4
|
-
# == About
|
5
|
-
#
|
6
|
-
# Some useful extensions to the +URI+ module.
|
7
|
-
#
|
8
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
9
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
10
|
-
# License:: GPL 3.0
|
11
|
-
# Website:: http://infinitemonkeywrench.org/
|
12
|
-
#
|
13
|
-
|
14
|
-
require 'uri'
|
15
|
-
|
16
|
-
module URI
|
17
|
-
|
18
|
-
# List of prefixes ignored when returning domains (or reversed
|
19
|
-
# domains).
|
20
|
-
IGNORED_PREFIXES = ['www']
|
21
|
-
|
22
|
-
# Returns the domain of the given URI, first scrubbing it of any
|
23
|
-
# prefixes we can ignore.
|
24
|
-
def self.domain(uri)
|
25
|
-
uriobj = self.parse(uri)
|
26
|
-
if uriobj.host then
|
27
|
-
host = uriobj.host
|
28
|
-
elsif uriobj.path then
|
29
|
-
host = uriobj.path.split('/')[0]
|
30
|
-
else
|
31
|
-
raise ArgumentError, "Invalid URI: #{uri}"
|
32
|
-
end
|
33
|
-
# remove any ignored prefixes from the hostname (i.e. - 'www')
|
34
|
-
parts = host.split('.')
|
35
|
-
parts = (IGNORED_PREFIXES.member?(parts[0]) ? parts[1...parts.size] : parts)
|
36
|
-
host = parts.join('.')
|
37
|
-
host
|
38
|
-
end
|
39
|
-
|
40
|
-
# Returns the reversed domain of the given URI, first scrubbing it of
|
41
|
-
# any prefixes we can ignore. Will not reverse numeric addresses of
|
42
|
-
# the form 127.0.0.1
|
43
|
-
def self.reverse_domain(uri)
|
44
|
-
begin
|
45
|
-
d = self.domain(uri)
|
46
|
-
# check for numeric ip
|
47
|
-
# in a TERRIBLE way that needs to be fixed!`
|
48
|
-
if d=~/^[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*$/ then
|
49
|
-
return d
|
50
|
-
else
|
51
|
-
return d.split('.').reverse.join('.')
|
52
|
-
end
|
53
|
-
rescue URI::InvalidURIError,ArgumentError
|
54
|
-
raise $!
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
# puts "#{File.basename(__FILE__)}: In the end, it's either you or I." # at bottom
|
@@ -1,112 +0,0 @@
|
|
1
|
-
# #
|
2
|
-
# # views
|
3
|
-
# #
|
4
|
-
# require 'imw/view/db_infochimps'
|
5
|
-
#
|
6
|
-
|
7
|
-
|
8
|
-
#
|
9
|
-
# This is where views of the metadata will go (right now it's all just
|
10
|
-
# sitting in a crapheap within model.rb).
|
11
|
-
#
|
12
|
-
# we'll have routines for
|
13
|
-
#
|
14
|
-
# - dumping/undumping to yaml
|
15
|
-
# - dumping/undumping to files that load right into the ics database.
|
16
|
-
#
|
17
|
-
class IMWObject
|
18
|
-
|
19
|
-
def self.from_icss(hsh)
|
20
|
-
# lists of dumpables
|
21
|
-
self._attr_objlists.each do |attr, cl|
|
22
|
-
if (vals = hsh.delete(attr.to_s))
|
23
|
-
hsh[attr] = vals.map{ |val| cl.from_icss(val) }
|
24
|
-
end
|
25
|
-
end
|
26
|
-
# simply dumpable objects
|
27
|
-
self._attr_objs.each do |attr, cl|
|
28
|
-
if (val = hsh.delete(attr.to_s))
|
29
|
-
hsh[attr] = cl.from_icss(val)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
self.new(hsh)
|
33
|
-
end
|
34
|
-
|
35
|
-
# Dump as a plain hash
|
36
|
-
def to_icss()
|
37
|
-
hsh = instance_values
|
38
|
-
# lists of dumpable objects
|
39
|
-
self.class._attr_objlists.keys.map(&:to_s).each do |attr|
|
40
|
-
hsh[attr] = (hsh.delete(attr)||[]).map{ |a| a.to_icss() }
|
41
|
-
end
|
42
|
-
# simply dumpable objects
|
43
|
-
self.class._attr_objs.keys.map(&:to_s).each do |attr|
|
44
|
-
(v=hsh.delete attr) && hsh[attr] = v.to_icss
|
45
|
-
end
|
46
|
-
hsh
|
47
|
-
end
|
48
|
-
|
49
|
-
# Pivot from object to relational view
|
50
|
-
def to_csv(parent_id=nil)
|
51
|
-
tables = {}
|
52
|
-
sub_ids = []
|
53
|
-
my_cl = self.class.to_s
|
54
|
-
self.class._attr_objs.sort.each do |attr, cl|
|
55
|
-
tables[attr] ||= [] ; tables[attr].push(self[attr].to_csv(id))
|
56
|
-
join = "%s_%s" % [my_cl, cl.to_s].sort
|
57
|
-
tables[join] ||= [] ; tables[join].push(id, self[attr].id)
|
58
|
-
sub_ids.push self[attr].handle
|
59
|
-
end
|
60
|
-
|
61
|
-
self.class._attr_objlists.sort.each do |attr, cl|
|
62
|
-
tables[attr] ||= []
|
63
|
-
join = "%s_%s" % [my_cl, cl.to_s].sort
|
64
|
-
tables[join] ||= []
|
65
|
-
self[attr].each do |obj|
|
66
|
-
tables[attr].push(obj.to_csv(id))
|
67
|
-
tables[join].push(id, obj.id)
|
68
|
-
sub_ids.push obj.handle
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
|
73
|
-
tables[self.class.to_s] = [
|
74
|
-
[self.id, parent_id].compact +
|
75
|
-
slice(self.class._attr_scalars.keys - [:id]) +
|
76
|
-
sub_ids
|
77
|
-
]
|
78
|
-
tables
|
79
|
-
end
|
80
|
-
|
81
|
-
end
|
82
|
-
|
83
|
-
class Note < IMWObject
|
84
|
-
def to_pair()
|
85
|
-
{ self.handle => self.desc }
|
86
|
-
end
|
87
|
-
def to_icss()
|
88
|
-
to_pair
|
89
|
-
end
|
90
|
-
def self.from_icss(pair)
|
91
|
-
self.new Hash.zip([:handle,:desc], pair.to_pair)
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
|
96
|
-
class TagList
|
97
|
-
def self.from_icss(str)
|
98
|
-
self.from(str)
|
99
|
-
end
|
100
|
-
def to_icss()
|
101
|
-
self.to_s
|
102
|
-
end
|
103
|
-
def to_csv(parent_id=nil)
|
104
|
-
[self.to_s]
|
105
|
-
end
|
106
|
-
|
107
|
-
def handle() to_s end
|
108
|
-
end
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
# You acquire the vision of a sharp-eyed tanzier. We'll just assume that's good.
|
@@ -1,117 +0,0 @@
|
|
1
|
-
# #
|
2
|
-
# # views
|
3
|
-
# #
|
4
|
-
# require 'imw/view/db_infochimps'
|
5
|
-
#
|
6
|
-
|
7
|
-
|
8
|
-
#
|
9
|
-
# This is where views of the metadata will go (right now it's all just
|
10
|
-
# sitting in a crapheap within model.rb).
|
11
|
-
#
|
12
|
-
# we'll have routines for
|
13
|
-
#
|
14
|
-
# - dumping/undumping to yaml
|
15
|
-
# - dumping/undumping to files that load right into the ics database.
|
16
|
-
#
|
17
|
-
class IMWBase
|
18
|
-
|
19
|
-
def self.from_icss(hsh)
|
20
|
-
# simply dumpable objects
|
21
|
-
self._attr_has_one.map(&:to_s).each do |attr|
|
22
|
-
if (val = hsh.delete(attr.to_s))
|
23
|
-
hsh[attr] = get_attr_class(attr).from_icss(val)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
# lists of dumpables
|
27
|
-
self._attr_manys.each do |attr|
|
28
|
-
if (vals = hsh.delete(attr.to_s))
|
29
|
-
hsh[attr] = vals.map{ |val| get_attr_class(attr).from_icss(val) }
|
30
|
-
end
|
31
|
-
end
|
32
|
-
self.new(hsh)
|
33
|
-
end
|
34
|
-
|
35
|
-
# Dump as a plain hash
|
36
|
-
def to_icss()
|
37
|
-
hsh = instance_values
|
38
|
-
# simply dumpable objects
|
39
|
-
self.class._attr_has_one.map(&:to_s).each do |attr|
|
40
|
-
(v=hsh.delete attr) && hsh[attr] = v.to_icss
|
41
|
-
end
|
42
|
-
# lists of dumpable objects
|
43
|
-
self.class._attr_manys.each do |attr|
|
44
|
-
hsh[attr] = (hsh.delete(attr)||[]).map{ |a| a.to_icss() }
|
45
|
-
end
|
46
|
-
hsh
|
47
|
-
end
|
48
|
-
|
49
|
-
# Pivot from object to relational view
|
50
|
-
def to_csv(parent_id=nil)
|
51
|
-
tables = {}
|
52
|
-
sub_ids = []
|
53
|
-
my_cl = self.class.to_s
|
54
|
-
self.class._attr_has_one.map(&:to_s).sort.each do |attr|
|
55
|
-
# Banks the object
|
56
|
-
obj = self[attr]
|
57
|
-
cl = self.class.get_attr_class(attr).to_s
|
58
|
-
tables[attr] ||= [] ; tables[attr].push( obj.to_csv(id) )
|
59
|
-
# tie the parent and child together
|
60
|
-
join = "%s_%s" % [my_cl, cl].sort
|
61
|
-
tables[join] ||= [] ; tables[join].push( [id, obj.id] )
|
62
|
-
sub_ids.push obj.handle
|
63
|
-
end
|
64
|
-
|
65
|
-
self.class._attr_manys.sort.each do |attr|
|
66
|
-
objs = self[attr] or next
|
67
|
-
cl = self.class.get_attr_class(attr).to_s
|
68
|
-
tables[attr] ||= []
|
69
|
-
join = "%s_%s" % [my_cl, cl.to_s].sort
|
70
|
-
tables[join] ||= []
|
71
|
-
objs.each do |obj|
|
72
|
-
tables[attr].push(obj.to_csv(id))
|
73
|
-
tables[join].push(id, obj.id)
|
74
|
-
sub_ids.push obj.handle
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
|
79
|
-
tables[self.class.to_s] = [
|
80
|
-
[self.id, parent_id].compact +
|
81
|
-
slice(self.class._attr_scalar - [:id]) +
|
82
|
-
sub_ids
|
83
|
-
].zip(['id', 'pid']+(self.class._attr_scalar - [:id])+self.class._attr_has_one.map(&:to_s).sort)
|
84
|
-
tables
|
85
|
-
end
|
86
|
-
|
87
|
-
end
|
88
|
-
|
89
|
-
class Note < IMWBase
|
90
|
-
# { :format_name => {}, ... } -- must be a hash
|
91
|
-
def to_pair()
|
92
|
-
{ self.handle => self.desc }
|
93
|
-
end
|
94
|
-
def to_icss()
|
95
|
-
to_pair
|
96
|
-
end
|
97
|
-
def self.from_icss(pair)
|
98
|
-
self.new Hash.zip([:handle,:desc], pair.to_pair)
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
|
103
|
-
class TagList
|
104
|
-
def self.from_icss(str)
|
105
|
-
self.from(str)
|
106
|
-
end
|
107
|
-
def to_icss()
|
108
|
-
self.to_s
|
109
|
-
end
|
110
|
-
def to_csv(parent_id=nil)
|
111
|
-
[self.to_s]
|
112
|
-
end
|
113
|
-
|
114
|
-
def handle() to_s end
|
115
|
-
end
|
116
|
-
|
117
|
-
|
data/lib/imw/utils/view.rb
DELETED
@@ -1,113 +0,0 @@
|
|
1
|
-
|
2
|
-
class ActiveRecord::Base
|
3
|
-
class << self
|
4
|
-
end
|
5
|
-
# def merge!(hsh)
|
6
|
-
# hsh = hsh.dup
|
7
|
-
# # puts hsh.to_yaml
|
8
|
-
# # has_many datasets, notes, fields, contributors
|
9
|
-
# self.class.reflect_on_all_associations.each do |ass|
|
10
|
-
# # ["@macro", "@class_name", "@name", "@primary_key_name", "@options",
|
11
|
-
# # "@klass",
|
12
|
-
# # "@through_reflection",
|
13
|
-
# # "@active_record",
|
14
|
-
# puts [ass.name, ass.macro, ass.primary_key_name].to_yaml
|
15
|
-
# if ass.macro == :has_many
|
16
|
-
# els = hsh.delete(ass.name.to_s) || []
|
17
|
-
# puts "!!!!!!!!!!!!!!!!!!!!!!!!!!", els, '!!'
|
18
|
-
# els.each do |el|
|
19
|
-
# puts el
|
20
|
-
# self[ass.name] = ass.klass.new().merge!(el)
|
21
|
-
# end
|
22
|
-
# end
|
23
|
-
# hsh.each do |key,val|
|
24
|
-
# self[key] = val
|
25
|
-
# end
|
26
|
-
# p self
|
27
|
-
# p self.datasets if self.respond_to? 'datasets'
|
28
|
-
# end
|
29
|
-
# end
|
30
|
-
def undump(hsh)
|
31
|
-
puts "unumping from #{hsh.to_json}"
|
32
|
-
hsh.each{ |k,v| self[k] = v }
|
33
|
-
self.save!
|
34
|
-
self
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
class Pool < ActiveRecord::Base
|
39
|
-
def undump(hsh)
|
40
|
-
{ :datasets => Dataset, :fields => Field,
|
41
|
-
:contributors => Contributor, :pool_notes => PoolNote }.each do |field, klass|
|
42
|
-
vals = hsh.delete(field.to_s) || []
|
43
|
-
puts "Undumping #{vals} info #{field}"
|
44
|
-
self[field.to_s] = vals.map{|val| f = klass.new().undump(val); f.save!; f}
|
45
|
-
end
|
46
|
-
super
|
47
|
-
self
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
class Dataset < ActiveRecord::Base
|
52
|
-
def undump(hsh)
|
53
|
-
{ :datasets => Dataset, :fields => Field,
|
54
|
-
:contributors => Contributor, :dataset_notes => DatasetNote }.each do |field, klass|
|
55
|
-
vals = hsh.delete(field.to_s) || []
|
56
|
-
puts "Undumping #{vals} info #{field}"
|
57
|
-
self[field.to_s] = vals.map{|val| f = klass.new().undump(val); f.save!; f}
|
58
|
-
end
|
59
|
-
super
|
60
|
-
puts "Got Dataset #{self.to_yaml}"
|
61
|
-
self
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
class IMW < OpenStruct
|
66
|
-
|
67
|
-
#
|
68
|
-
# Takes an Infochimps Stupid Schema stream and
|
69
|
-
# constructs the corresponding objects.
|
70
|
-
#
|
71
|
-
# Here are the rules:
|
72
|
-
# * the schema has the structure
|
73
|
-
# # this has to be first.
|
74
|
-
# - infochimps_schema:
|
75
|
-
# schema_version: 0.2 # in case stuff changes
|
76
|
-
# # then any number of imw objects:
|
77
|
-
# - pool: (...)
|
78
|
-
# fields: [era, innings_pitched,
|
79
|
-
# - dataset: (...)
|
80
|
-
# fields:
|
81
|
-
# - name: Earned Run Average
|
82
|
-
# handle: era
|
83
|
-
# concept: baseball-era
|
84
|
-
# units: earned_runs / (9*innings_pitched)
|
85
|
-
# - contributor: (...)
|
86
|
-
# - field: (...)
|
87
|
-
#
|
88
|
-
# * Objects are referred to by __handle__, *NOT* __id__. If an ID is
|
89
|
-
# included, and an object exists with a non-matching ID or handle,
|
90
|
-
# an error will be raised.
|
91
|
-
#
|
92
|
-
# * We want to make the schema files maintainable by hand, which means that
|
93
|
-
# the loader tries to be smart about inline-defined objects. That is, you
|
94
|
-
# can either refer to (via handle) a field defined elsewhere, or you can
|
95
|
-
# define the field in whole, and trust that the Right Thing will
|
96
|
-
# happen. This presents the problem of collisions, though. If a bulk object
|
97
|
-
# update arrives, we need to know whom to believe -- bulk loader or
|
98
|
-
# database. In the absence of versioning: we look up the object by its
|
99
|
-
# handle. If there's an existing object, any new information (fields with
|
100
|
-
# values in new that are blank in old) is added to it. If the object is
|
101
|
-
# defined at the top level, it wins; if the object is defined as a sub field
|
102
|
-
# it loses.
|
103
|
-
#
|
104
|
-
# * Every interesting object (Pool, Dataset, Contributor, Field) has a desc:
|
105
|
-
# attribute (for Pool and Dataset it's virtual but never mind) to describe
|
106
|
-
# __itself__. Additionally, every interesting relationship has its own desc: field.
|
107
|
-
#
|
108
|
-
|
109
|
-
def self.undump(schema)
|
110
|
-
|
111
|
-
# compact then merge -- kill off blank
|
112
|
-
end
|
113
|
-
end
|
@@ -1,43 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
2
|
-
require File.join(File.dirname(__FILE__),'../datamapper_spec_helper')
|
3
|
-
include IMW
|
4
|
-
require 'imw/dataset/datamapper/uri'
|
5
|
-
|
6
|
-
if IMW::SpecConfig::TEST_WITH_DATAMAPPER
|
7
|
-
IMW::SpecConfig.setup_datamapper_test_db
|
8
|
-
describe IMW do
|
9
|
-
|
10
|
-
before(:each) do
|
11
|
-
DM_URI.all.each do |u| u.destroy end
|
12
|
-
end
|
13
|
-
|
14
|
-
it "makes a URI from a barely complete string" do
|
15
|
-
DM_URI.find_or_create_from_url('google.com')
|
16
|
-
u = DM_URI.first
|
17
|
-
u.should_not be_nil
|
18
|
-
u.host.should == 'google.com'
|
19
|
-
end
|
20
|
-
|
21
|
-
it "behaves as normalized" do
|
22
|
-
DM_URI.find_or_create_from_url('google.com')
|
23
|
-
u = DM_URI.first
|
24
|
-
u.path.should == '/'
|
25
|
-
u.scheme.should == 'http'
|
26
|
-
u.port.should be_nil
|
27
|
-
end
|
28
|
-
|
29
|
-
it "makes a complicated URI from a complicated string" do
|
30
|
-
DM_URI.find_or_create_from_url('http://me:and@your.mom.com:69/what?orly=yarly&ok=then')
|
31
|
-
dm_uri = DM_URI.first({
|
32
|
-
:scheme => 'http', :host => 'your.mom.com', :port => '69',
|
33
|
-
:query => 'what?orly=yarly&ok=then'
|
34
|
-
})
|
35
|
-
end
|
36
|
-
|
37
|
-
# it converts to a file path
|
38
|
-
# it doesn't leave a trailing / on the file path
|
39
|
-
# it escapes unicode URLs
|
40
|
-
# it escapes non-URL chars in URL
|
41
|
-
end
|
42
|
-
|
43
|
-
end
|
@@ -1,11 +0,0 @@
|
|
1
|
-
require 'imw/dataset/datamapper'
|
2
|
-
|
3
|
-
module IMW::SpecConfig
|
4
|
-
|
5
|
-
def self.setup_datamapper_test_db
|
6
|
-
IMW::Dataset.setup_remote_connection IMW::DEFAULT_DATABASE_CONNECTION_PARAMS.merge({
|
7
|
-
:dbname => 'imw_dataset_datamapper_test' })
|
8
|
-
DataMapper.auto_migrate!
|
9
|
-
end
|
10
|
-
|
11
|
-
end
|
@@ -1,118 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. spec/imw/model/files/archive_spec.rb -- module for use in testing various archive formats
|
3
|
-
#
|
4
|
-
# == About
|
5
|
-
#
|
6
|
-
# The <tt>IMW::Files::Archive</tt> module doesn't implement any
|
7
|
-
# functionality of its own but merely adds methods to an including
|
8
|
-
# class. Appropriately, this spec file implements a shared example
|
9
|
-
# group ("an archive of files") which can be including
|
10
|
-
# by the spec of an archive class. This spec must also define the
|
11
|
-
# following instance variables:
|
12
|
-
#
|
13
|
-
# <tt>@archive</tt>:: a subclass of <tt>IMW::Files::BasicFile</tt> which
|
14
|
-
# has the <tt>IMW::Files::Archive</tt> module mixed in.
|
15
|
-
#
|
16
|
-
# <tt>@root_directory</tt>: a string specifying the path where all the
|
17
|
-
# files will be created
|
18
|
-
#
|
19
|
-
# <tt>@initial_directory</tt>: a string specifying the path where some
|
20
|
-
# files for the initial creation of the archive will be created.
|
21
|
-
#
|
22
|
-
# <tt>@appending_directory</tt>: a string specifying the path where
|
23
|
-
# all some files for appending to the archive will be created.
|
24
|
-
#
|
25
|
-
# <tt>@extraction_directory</tt>: a string specifying the path where
|
26
|
-
# the archive's files will be extracted.
|
27
|
-
#
|
28
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
29
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
30
|
-
# License:: GPL 3.0
|
31
|
-
# Website:: http://infinitemonkeywrench.org/
|
32
|
-
#
|
33
|
-
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
34
|
-
require IMW_SPEC_DIR+'/imw/matchers/archive_contents_matcher'
|
35
|
-
require IMW_SPEC_DIR+'/imw/matchers/directory_contents_matcher'
|
36
|
-
|
37
|
-
require 'imw/utils/random'
|
38
|
-
require 'imw/utils/extensions/find'
|
39
|
-
share_examples_for "an archive of files" do
|
40
|
-
include Spec::Matchers::IMW
|
41
|
-
|
42
|
-
def create_random_files
|
43
|
-
IMW::Random.directory_with_files(@initial_directory)
|
44
|
-
IMW::Random.directory_with_files(@appending_directory)
|
45
|
-
FileUtils.mkdir(@extraction_directory)
|
46
|
-
end
|
47
|
-
|
48
|
-
def delete_random_files
|
49
|
-
FileUtils.rm_rf [@root_directory,@extraction_directory]
|
50
|
-
end
|
51
|
-
|
52
|
-
before(:each) do
|
53
|
-
create_random_files
|
54
|
-
end
|
55
|
-
|
56
|
-
after(:each) do
|
57
|
-
delete_random_files
|
58
|
-
FileUtils.rm(@archive.path) if @archive.exist?
|
59
|
-
end
|
60
|
-
|
61
|
-
describe "(listing)" do
|
62
|
-
it "should raise an error when listing a non-existent archive" do
|
63
|
-
lambda { @archive.contents }.should raise_error(IMW::Error)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
describe "(creation)" do
|
68
|
-
|
69
|
-
it "should be able to create archives which match a directory's structure" do
|
70
|
-
@archive.create(@initial_directory + "/*")
|
71
|
-
@archive.should contain_paths_like(@initial_directory, :relative_to => @root_directory)
|
72
|
-
end
|
73
|
-
|
74
|
-
it "should raise an error if trying to overwrite an archive without the :force option" do
|
75
|
-
@archive.create(@initial_directory + "/*")
|
76
|
-
lambda { @archive.create(@initial_directory + "/*") }.should raise_error(IMW::Error)
|
77
|
-
end
|
78
|
-
|
79
|
-
it "should overwrite an archive if the :force option is given" do
|
80
|
-
@archive.create(@initial_directory + "/*")
|
81
|
-
@archive.create(@initial_directory + "/*", :force => true)
|
82
|
-
@archive.should contain_paths_like(@initial_directory, :relative_to => @root_directory)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
describe "(appending)" do
|
87
|
-
|
88
|
-
it "should append to an archive which already exists" do
|
89
|
-
@archive.create(@initial_directory + "/*")
|
90
|
-
@archive.append(@appending_directory + "/*")
|
91
|
-
@archive.should contain_paths_like([@initial_directory,@appending_directory], :relative_to => @root_directory)
|
92
|
-
end
|
93
|
-
|
94
|
-
it "should append to an archive which doesn't already exist" do
|
95
|
-
@archive.append(@appending_directory + "/*")
|
96
|
-
@archive.should contain_paths_like(@appending_directory, :relative_to => @root_directory)
|
97
|
-
end
|
98
|
-
|
99
|
-
end
|
100
|
-
|
101
|
-
describe "(extracting)" do
|
102
|
-
|
103
|
-
it "should raise an error when trying to extract from a non-existing archive" do
|
104
|
-
lambda { @archive.extract }.should raise_error(IMW::Error)
|
105
|
-
end
|
106
|
-
|
107
|
-
it "should extract files which match the original ones it archived" do
|
108
|
-
@archive.create(@initial_directory + "/*")
|
109
|
-
@archive.append(@appending_directory + "/*")
|
110
|
-
new_archive = @archive.cp(@extraction_directory + '/' + @archive.basename)
|
111
|
-
new_archive.extract
|
112
|
-
@extraction_directory.should contain_files_matching_directory(@root_directory)
|
113
|
-
end
|
114
|
-
|
115
|
-
end
|
116
|
-
end unless defined? IMW_FILES_ARCHIVE_SHARED_SPEC
|
117
|
-
|
118
|
-
# puts "#{File.basename(__FILE__)}: How many drunken frat boys can fit in an Internet kiosk?" # at bottom
|