imw 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -1
- data/Rakefile +10 -0
- data/TODO +18 -0
- data/VERSION +1 -1
- data/bin/imw +1 -1
- data/etc/imwrc.rb +0 -50
- data/examples/dataset.rb +12 -0
- data/lib/imw/boot.rb +55 -9
- data/lib/imw/dataset/paths.rb +15 -24
- data/lib/imw/dataset/workflow.rb +131 -72
- data/lib/imw/dataset.rb +94 -186
- data/lib/imw/parsers/html_parser.rb +1 -1
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +3 -27
- data/lib/imw/resource.rb +190 -0
- data/lib/imw/resources/archive.rb +97 -0
- data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
- data/lib/imw/resources/archives_and_compressed.rb +32 -0
- data/lib/imw/resources/compressed_file.rb +89 -0
- data/lib/imw/resources/compressible.rb +77 -0
- data/lib/imw/resources/formats/delimited.rb +92 -0
- data/lib/imw/resources/formats/excel.rb +125 -0
- data/lib/imw/resources/formats/json.rb +53 -0
- data/lib/imw/resources/formats/sgml.rb +72 -0
- data/lib/imw/resources/formats/yaml.rb +53 -0
- data/lib/imw/resources/formats.rb +32 -0
- data/lib/imw/resources/local.rb +198 -0
- data/lib/imw/resources/remote.rb +110 -0
- data/lib/imw/resources/schemes/hdfs.rb +242 -0
- data/lib/imw/resources/schemes/http.rb +161 -0
- data/lib/imw/resources/schemes/s3.rb +137 -0
- data/lib/imw/resources/schemes.rb +19 -0
- data/lib/imw/resources.rb +118 -0
- data/lib/imw/runner.rb +5 -4
- data/lib/imw/transforms/archiver.rb +215 -0
- data/lib/imw/transforms/transferer.rb +103 -0
- data/lib/imw/transforms.rb +8 -0
- data/lib/imw/utils/error.rb +26 -30
- data/lib/imw/utils/extensions/array.rb +5 -15
- data/lib/imw/utils/extensions/hash.rb +6 -16
- data/lib/imw/utils/extensions/hpricot.rb +0 -14
- data/lib/imw/utils/extensions/string.rb +5 -15
- data/lib/imw/utils/extensions/symbol.rb +0 -13
- data/lib/imw/utils/extensions.rb +65 -0
- data/lib/imw/utils/log.rb +14 -13
- data/lib/imw/utils/misc.rb +0 -6
- data/lib/imw/utils/paths.rb +101 -42
- data/lib/imw/utils/version.rb +8 -9
- data/lib/imw/utils.rb +2 -18
- data/lib/imw.rb +92 -17
- data/spec/data/sample.csv +1 -1
- data/spec/data/sample.json +1 -0
- data/spec/data/sample.tsv +1 -1
- data/spec/data/sample.txt +1 -1
- data/spec/data/sample.xml +1 -1
- data/spec/data/sample.yaml +1 -1
- data/spec/imw/dataset/paths_spec.rb +32 -0
- data/spec/imw/dataset/workflow_spec.rb +41 -0
- data/spec/imw/resource_spec.rb +79 -0
- data/spec/imw/resources/archive_spec.rb +69 -0
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
- data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
- data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
- data/spec/imw/resources/compressed_file_spec.rb +48 -0
- data/spec/imw/resources/compressible_spec.rb +36 -0
- data/spec/imw/resources/formats/delimited_spec.rb +33 -0
- data/spec/imw/resources/formats/json_spec.rb +32 -0
- data/spec/imw/resources/formats/sgml_spec.rb +24 -0
- data/spec/imw/resources/formats/yaml_spec.rb +41 -0
- data/spec/imw/resources/local_spec.rb +98 -0
- data/spec/imw/resources/remote_spec.rb +35 -0
- data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
- data/spec/imw/resources/schemes/http_spec.rb +19 -0
- data/spec/imw/resources/schemes/s3_spec.rb +19 -0
- data/spec/imw/transforms/archiver_spec.rb +120 -0
- data/spec/imw/transforms/transferer_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +5 -33
- data/spec/imw/utils/shared_paths_spec.rb +29 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/support/paths_matcher.rb +67 -0
- data/spec/support/random.rb +39 -36
- metadata +88 -75
- data/lib/imw/dataset/task.rb +0 -41
- data/lib/imw/files/archive.rb +0 -113
- data/lib/imw/files/basicfile.rb +0 -122
- data/lib/imw/files/binary.rb +0 -28
- data/lib/imw/files/compressed_file.rb +0 -93
- data/lib/imw/files/compressed_files_and_archives.rb +0 -334
- data/lib/imw/files/compressible.rb +0 -103
- data/lib/imw/files/csv.rb +0 -113
- data/lib/imw/files/directory.rb +0 -62
- data/lib/imw/files/excel.rb +0 -84
- data/lib/imw/files/json.rb +0 -41
- data/lib/imw/files/sgml.rb +0 -46
- data/lib/imw/files/text.rb +0 -68
- data/lib/imw/files/yaml.rb +0 -46
- data/lib/imw/files.rb +0 -125
- data/lib/imw/packagers/archiver.rb +0 -126
- data/lib/imw/packagers/s3_mover.rb +0 -36
- data/lib/imw/packagers.rb +0 -8
- data/lib/imw/utils/components.rb +0 -61
- data/lib/imw/utils/config.rb +0 -46
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
- data/lib/imw/utils/extensions/core.rb +0 -27
- data/lib/imw/utils/extensions/dir.rb +0 -24
- data/lib/imw/utils/extensions/file_core.rb +0 -64
- data/lib/imw/utils/extensions/typed_struct.rb +0 -22
- data/lib/imw/utils/extensions/uri.rb +0 -59
- data/lib/imw/utils/view/dump_csv.rb +0 -112
- data/lib/imw/utils/view/dump_csv_older.rb +0 -117
- data/lib/imw/utils/view.rb +0 -113
- data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
- data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
- data/spec/imw/files/archive_spec.rb +0 -118
- data/spec/imw/files/basicfile_spec.rb +0 -121
- data/spec/imw/files/bz2_spec.rb +0 -32
- data/spec/imw/files/compressed_file_spec.rb +0 -96
- data/spec/imw/files/compressible_spec.rb +0 -100
- data/spec/imw/files/file_spec.rb +0 -144
- data/spec/imw/files/gz_spec.rb +0 -32
- data/spec/imw/files/rar_spec.rb +0 -33
- data/spec/imw/files/tar_spec.rb +0 -31
- data/spec/imw/files/text_spec.rb +0 -23
- data/spec/imw/files/zip_spec.rb +0 -31
- data/spec/imw/files_spec.rb +0 -38
- data/spec/imw/packagers/archiver_spec.rb +0 -125
- data/spec/imw/packagers/s3_mover_spec.rb +0 -7
- data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
- data/spec/imw/utils/extensions/find_spec.rb +0 -113
- data/spec/imw/workflow/rip/local_spec.rb +0 -89
- data/spec/imw/workflow/rip_spec.rb +0 -27
- data/spec/support/archive_contents_matcher.rb +0 -94
- data/spec/support/directory_contents_matcher.rb +0 -61
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/utils/extensions/file.rb -- extensions to built-in file class
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
7
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
8
|
-
# License:: GPL 3.0
|
|
9
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
10
|
-
#
|
|
11
|
-
|
|
12
|
-
require 'imw/utils/error'
|
|
13
|
-
require 'imw/utils/config'
|
|
14
|
-
require 'imw/utils/extensions/string'
|
|
15
|
-
|
|
16
|
-
class File
|
|
17
|
-
|
|
18
|
-
# Returns the name of the path given:
|
|
19
|
-
#
|
|
20
|
-
# File.name_of_file("/path/to/somefile.txt") => "somefile".
|
|
21
|
-
def self.name_of_file path
|
|
22
|
-
basename(path)[0,basename(path).length - extname(path).length]
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
# Returns what would be the handle of a source or dataset
|
|
26
|
-
# described by a file at +path+:
|
|
27
|
-
#
|
|
28
|
-
# File.handle "/path/to/a_particular_dataset.instructions.yaml" #=> :a_particular_dataset
|
|
29
|
-
def self.handle path
|
|
30
|
-
File.basename(path).split('.').first.handle
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# Returns a unique (non-existing) version of the given +path+ by
|
|
34
|
-
# appending successive intgers, useful for copying files ito
|
|
35
|
-
# directories without clobbering existing files (a la <tt>wget
|
|
36
|
-
# -nc</tt>).
|
|
37
|
-
#
|
|
38
|
-
# In a directory <tt>/path/to</tt> without a file named
|
|
39
|
-
# <tt>data.txt</tt>
|
|
40
|
-
#
|
|
41
|
-
# File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt"</tt>
|
|
42
|
-
#
|
|
43
|
-
# If <tt>data.txt</tt> were to already exist in that directory, then
|
|
44
|
-
#
|
|
45
|
-
# File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.1"
|
|
46
|
-
#
|
|
47
|
-
# If <tt>data.txt.1</tt> were to already exist then
|
|
48
|
-
#
|
|
49
|
-
# File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.2"
|
|
50
|
-
#
|
|
51
|
-
# and so on.
|
|
52
|
-
def self.uniquify path
|
|
53
|
-
orig_path = path.clone
|
|
54
|
-
copy_number = 1
|
|
55
|
-
while exist? path do
|
|
56
|
-
path = orig_path + ".#{copy_number}"
|
|
57
|
-
copy_number += 1
|
|
58
|
-
end
|
|
59
|
-
path
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# puts "#{File.basename(__FILE__)}: You add a bit of glitter and jazz to all the folders in the cabinet. It makes you feel happier when you have to sort through them." # at bottom
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# A struct
|
|
3
|
-
# but has an idea of what type attributes should be
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
class TypedStruct < Struct
|
|
7
|
-
def self.new attrs, convs
|
|
8
|
-
struct = super *attrs
|
|
9
|
-
struct_attr_convs = Hash.zip(attrs, convs).reject{|a,t| t.nil? }
|
|
10
|
-
struct.class_eval do
|
|
11
|
-
cattr_accessor :attr_convs
|
|
12
|
-
self.attr_convs = struct_attr_convs
|
|
13
|
-
def remap!
|
|
14
|
-
attr_convs.each do |attr, conv|
|
|
15
|
-
curr = self.send(attr)
|
|
16
|
-
self.send("#{attr}=", curr.send(conv)) if curr.respond_to?(conv)
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
end # class_eval
|
|
20
|
-
struct
|
|
21
|
-
end
|
|
22
|
-
end
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/utils/extensions/uri.rb -- extensions to uri module
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Some useful extensions to the +URI+ module.
|
|
7
|
-
#
|
|
8
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
9
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
10
|
-
# License:: GPL 3.0
|
|
11
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
12
|
-
#
|
|
13
|
-
|
|
14
|
-
require 'uri'
|
|
15
|
-
|
|
16
|
-
module URI
|
|
17
|
-
|
|
18
|
-
# List of prefixes ignored when returning domains (or reversed
|
|
19
|
-
# domains).
|
|
20
|
-
IGNORED_PREFIXES = ['www']
|
|
21
|
-
|
|
22
|
-
# Returns the domain of the given URI, first scrubbing it of any
|
|
23
|
-
# prefixes we can ignore.
|
|
24
|
-
def self.domain(uri)
|
|
25
|
-
uriobj = self.parse(uri)
|
|
26
|
-
if uriobj.host then
|
|
27
|
-
host = uriobj.host
|
|
28
|
-
elsif uriobj.path then
|
|
29
|
-
host = uriobj.path.split('/')[0]
|
|
30
|
-
else
|
|
31
|
-
raise ArgumentError, "Invalid URI: #{uri}"
|
|
32
|
-
end
|
|
33
|
-
# remove any ignored prefixes from the hostname (i.e. - 'www')
|
|
34
|
-
parts = host.split('.')
|
|
35
|
-
parts = (IGNORED_PREFIXES.member?(parts[0]) ? parts[1...parts.size] : parts)
|
|
36
|
-
host = parts.join('.')
|
|
37
|
-
host
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
# Returns the reversed domain of the given URI, first scrubbing it of
|
|
41
|
-
# any prefixes we can ignore. Will not reverse numeric addresses of
|
|
42
|
-
# the form 127.0.0.1
|
|
43
|
-
def self.reverse_domain(uri)
|
|
44
|
-
begin
|
|
45
|
-
d = self.domain(uri)
|
|
46
|
-
# check for numeric ip
|
|
47
|
-
# in a TERRIBLE way that needs to be fixed!`
|
|
48
|
-
if d=~/^[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*$/ then
|
|
49
|
-
return d
|
|
50
|
-
else
|
|
51
|
-
return d.split('.').reverse.join('.')
|
|
52
|
-
end
|
|
53
|
-
rescue URI::InvalidURIError,ArgumentError
|
|
54
|
-
raise $!
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# puts "#{File.basename(__FILE__)}: In the end, it's either you or I." # at bottom
|
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
# #
|
|
2
|
-
# # views
|
|
3
|
-
# #
|
|
4
|
-
# require 'imw/view/db_infochimps'
|
|
5
|
-
#
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
#
|
|
9
|
-
# This is where views of the metadata will go (right now it's all just
|
|
10
|
-
# sitting in a crapheap within model.rb).
|
|
11
|
-
#
|
|
12
|
-
# we'll have routines for
|
|
13
|
-
#
|
|
14
|
-
# - dumping/undumping to yaml
|
|
15
|
-
# - dumping/undumping to files that load right into the ics database.
|
|
16
|
-
#
|
|
17
|
-
class IMWObject
|
|
18
|
-
|
|
19
|
-
def self.from_icss(hsh)
|
|
20
|
-
# lists of dumpables
|
|
21
|
-
self._attr_objlists.each do |attr, cl|
|
|
22
|
-
if (vals = hsh.delete(attr.to_s))
|
|
23
|
-
hsh[attr] = vals.map{ |val| cl.from_icss(val) }
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
# simply dumpable objects
|
|
27
|
-
self._attr_objs.each do |attr, cl|
|
|
28
|
-
if (val = hsh.delete(attr.to_s))
|
|
29
|
-
hsh[attr] = cl.from_icss(val)
|
|
30
|
-
end
|
|
31
|
-
end
|
|
32
|
-
self.new(hsh)
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Dump as a plain hash
|
|
36
|
-
def to_icss()
|
|
37
|
-
hsh = instance_values
|
|
38
|
-
# lists of dumpable objects
|
|
39
|
-
self.class._attr_objlists.keys.map(&:to_s).each do |attr|
|
|
40
|
-
hsh[attr] = (hsh.delete(attr)||[]).map{ |a| a.to_icss() }
|
|
41
|
-
end
|
|
42
|
-
# simply dumpable objects
|
|
43
|
-
self.class._attr_objs.keys.map(&:to_s).each do |attr|
|
|
44
|
-
(v=hsh.delete attr) && hsh[attr] = v.to_icss
|
|
45
|
-
end
|
|
46
|
-
hsh
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
# Pivot from object to relational view
|
|
50
|
-
def to_csv(parent_id=nil)
|
|
51
|
-
tables = {}
|
|
52
|
-
sub_ids = []
|
|
53
|
-
my_cl = self.class.to_s
|
|
54
|
-
self.class._attr_objs.sort.each do |attr, cl|
|
|
55
|
-
tables[attr] ||= [] ; tables[attr].push(self[attr].to_csv(id))
|
|
56
|
-
join = "%s_%s" % [my_cl, cl.to_s].sort
|
|
57
|
-
tables[join] ||= [] ; tables[join].push(id, self[attr].id)
|
|
58
|
-
sub_ids.push self[attr].handle
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
self.class._attr_objlists.sort.each do |attr, cl|
|
|
62
|
-
tables[attr] ||= []
|
|
63
|
-
join = "%s_%s" % [my_cl, cl.to_s].sort
|
|
64
|
-
tables[join] ||= []
|
|
65
|
-
self[attr].each do |obj|
|
|
66
|
-
tables[attr].push(obj.to_csv(id))
|
|
67
|
-
tables[join].push(id, obj.id)
|
|
68
|
-
sub_ids.push obj.handle
|
|
69
|
-
end
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
tables[self.class.to_s] = [
|
|
74
|
-
[self.id, parent_id].compact +
|
|
75
|
-
slice(self.class._attr_scalars.keys - [:id]) +
|
|
76
|
-
sub_ids
|
|
77
|
-
]
|
|
78
|
-
tables
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
class Note < IMWObject
|
|
84
|
-
def to_pair()
|
|
85
|
-
{ self.handle => self.desc }
|
|
86
|
-
end
|
|
87
|
-
def to_icss()
|
|
88
|
-
to_pair
|
|
89
|
-
end
|
|
90
|
-
def self.from_icss(pair)
|
|
91
|
-
self.new Hash.zip([:handle,:desc], pair.to_pair)
|
|
92
|
-
end
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
class TagList
|
|
97
|
-
def self.from_icss(str)
|
|
98
|
-
self.from(str)
|
|
99
|
-
end
|
|
100
|
-
def to_icss()
|
|
101
|
-
self.to_s
|
|
102
|
-
end
|
|
103
|
-
def to_csv(parent_id=nil)
|
|
104
|
-
[self.to_s]
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
def handle() to_s end
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
# You acquire the vision of a sharp-eyed tanzier. We'll just assume that's good.
|
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
# #
|
|
2
|
-
# # views
|
|
3
|
-
# #
|
|
4
|
-
# require 'imw/view/db_infochimps'
|
|
5
|
-
#
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
#
|
|
9
|
-
# This is where views of the metadata will go (right now it's all just
|
|
10
|
-
# sitting in a crapheap within model.rb).
|
|
11
|
-
#
|
|
12
|
-
# we'll have routines for
|
|
13
|
-
#
|
|
14
|
-
# - dumping/undumping to yaml
|
|
15
|
-
# - dumping/undumping to files that load right into the ics database.
|
|
16
|
-
#
|
|
17
|
-
class IMWBase
|
|
18
|
-
|
|
19
|
-
def self.from_icss(hsh)
|
|
20
|
-
# simply dumpable objects
|
|
21
|
-
self._attr_has_one.map(&:to_s).each do |attr|
|
|
22
|
-
if (val = hsh.delete(attr.to_s))
|
|
23
|
-
hsh[attr] = get_attr_class(attr).from_icss(val)
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
# lists of dumpables
|
|
27
|
-
self._attr_manys.each do |attr|
|
|
28
|
-
if (vals = hsh.delete(attr.to_s))
|
|
29
|
-
hsh[attr] = vals.map{ |val| get_attr_class(attr).from_icss(val) }
|
|
30
|
-
end
|
|
31
|
-
end
|
|
32
|
-
self.new(hsh)
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Dump as a plain hash
|
|
36
|
-
def to_icss()
|
|
37
|
-
hsh = instance_values
|
|
38
|
-
# simply dumpable objects
|
|
39
|
-
self.class._attr_has_one.map(&:to_s).each do |attr|
|
|
40
|
-
(v=hsh.delete attr) && hsh[attr] = v.to_icss
|
|
41
|
-
end
|
|
42
|
-
# lists of dumpable objects
|
|
43
|
-
self.class._attr_manys.each do |attr|
|
|
44
|
-
hsh[attr] = (hsh.delete(attr)||[]).map{ |a| a.to_icss() }
|
|
45
|
-
end
|
|
46
|
-
hsh
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
# Pivot from object to relational view
|
|
50
|
-
def to_csv(parent_id=nil)
|
|
51
|
-
tables = {}
|
|
52
|
-
sub_ids = []
|
|
53
|
-
my_cl = self.class.to_s
|
|
54
|
-
self.class._attr_has_one.map(&:to_s).sort.each do |attr|
|
|
55
|
-
# Banks the object
|
|
56
|
-
obj = self[attr]
|
|
57
|
-
cl = self.class.get_attr_class(attr).to_s
|
|
58
|
-
tables[attr] ||= [] ; tables[attr].push( obj.to_csv(id) )
|
|
59
|
-
# tie the parent and child together
|
|
60
|
-
join = "%s_%s" % [my_cl, cl].sort
|
|
61
|
-
tables[join] ||= [] ; tables[join].push( [id, obj.id] )
|
|
62
|
-
sub_ids.push obj.handle
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
self.class._attr_manys.sort.each do |attr|
|
|
66
|
-
objs = self[attr] or next
|
|
67
|
-
cl = self.class.get_attr_class(attr).to_s
|
|
68
|
-
tables[attr] ||= []
|
|
69
|
-
join = "%s_%s" % [my_cl, cl.to_s].sort
|
|
70
|
-
tables[join] ||= []
|
|
71
|
-
objs.each do |obj|
|
|
72
|
-
tables[attr].push(obj.to_csv(id))
|
|
73
|
-
tables[join].push(id, obj.id)
|
|
74
|
-
sub_ids.push obj.handle
|
|
75
|
-
end
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
tables[self.class.to_s] = [
|
|
80
|
-
[self.id, parent_id].compact +
|
|
81
|
-
slice(self.class._attr_scalar - [:id]) +
|
|
82
|
-
sub_ids
|
|
83
|
-
].zip(['id', 'pid']+(self.class._attr_scalar - [:id])+self.class._attr_has_one.map(&:to_s).sort)
|
|
84
|
-
tables
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
class Note < IMWBase
|
|
90
|
-
# { :format_name => {}, ... } -- must be a hash
|
|
91
|
-
def to_pair()
|
|
92
|
-
{ self.handle => self.desc }
|
|
93
|
-
end
|
|
94
|
-
def to_icss()
|
|
95
|
-
to_pair
|
|
96
|
-
end
|
|
97
|
-
def self.from_icss(pair)
|
|
98
|
-
self.new Hash.zip([:handle,:desc], pair.to_pair)
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
class TagList
|
|
104
|
-
def self.from_icss(str)
|
|
105
|
-
self.from(str)
|
|
106
|
-
end
|
|
107
|
-
def to_icss()
|
|
108
|
-
self.to_s
|
|
109
|
-
end
|
|
110
|
-
def to_csv(parent_id=nil)
|
|
111
|
-
[self.to_s]
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
def handle() to_s end
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
|
data/lib/imw/utils/view.rb
DELETED
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
class ActiveRecord::Base
|
|
3
|
-
class << self
|
|
4
|
-
end
|
|
5
|
-
# def merge!(hsh)
|
|
6
|
-
# hsh = hsh.dup
|
|
7
|
-
# # puts hsh.to_yaml
|
|
8
|
-
# # has_many datasets, notes, fields, contributors
|
|
9
|
-
# self.class.reflect_on_all_associations.each do |ass|
|
|
10
|
-
# # ["@macro", "@class_name", "@name", "@primary_key_name", "@options",
|
|
11
|
-
# # "@klass",
|
|
12
|
-
# # "@through_reflection",
|
|
13
|
-
# # "@active_record",
|
|
14
|
-
# puts [ass.name, ass.macro, ass.primary_key_name].to_yaml
|
|
15
|
-
# if ass.macro == :has_many
|
|
16
|
-
# els = hsh.delete(ass.name.to_s) || []
|
|
17
|
-
# puts "!!!!!!!!!!!!!!!!!!!!!!!!!!", els, '!!'
|
|
18
|
-
# els.each do |el|
|
|
19
|
-
# puts el
|
|
20
|
-
# self[ass.name] = ass.klass.new().merge!(el)
|
|
21
|
-
# end
|
|
22
|
-
# end
|
|
23
|
-
# hsh.each do |key,val|
|
|
24
|
-
# self[key] = val
|
|
25
|
-
# end
|
|
26
|
-
# p self
|
|
27
|
-
# p self.datasets if self.respond_to? 'datasets'
|
|
28
|
-
# end
|
|
29
|
-
# end
|
|
30
|
-
def undump(hsh)
|
|
31
|
-
puts "unumping from #{hsh.to_json}"
|
|
32
|
-
hsh.each{ |k,v| self[k] = v }
|
|
33
|
-
self.save!
|
|
34
|
-
self
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
class Pool < ActiveRecord::Base
|
|
39
|
-
def undump(hsh)
|
|
40
|
-
{ :datasets => Dataset, :fields => Field,
|
|
41
|
-
:contributors => Contributor, :pool_notes => PoolNote }.each do |field, klass|
|
|
42
|
-
vals = hsh.delete(field.to_s) || []
|
|
43
|
-
puts "Undumping #{vals} info #{field}"
|
|
44
|
-
self[field.to_s] = vals.map{|val| f = klass.new().undump(val); f.save!; f}
|
|
45
|
-
end
|
|
46
|
-
super
|
|
47
|
-
self
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
class Dataset < ActiveRecord::Base
|
|
52
|
-
def undump(hsh)
|
|
53
|
-
{ :datasets => Dataset, :fields => Field,
|
|
54
|
-
:contributors => Contributor, :dataset_notes => DatasetNote }.each do |field, klass|
|
|
55
|
-
vals = hsh.delete(field.to_s) || []
|
|
56
|
-
puts "Undumping #{vals} info #{field}"
|
|
57
|
-
self[field.to_s] = vals.map{|val| f = klass.new().undump(val); f.save!; f}
|
|
58
|
-
end
|
|
59
|
-
super
|
|
60
|
-
puts "Got Dataset #{self.to_yaml}"
|
|
61
|
-
self
|
|
62
|
-
end
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
class IMW < OpenStruct
|
|
66
|
-
|
|
67
|
-
#
|
|
68
|
-
# Takes an Infochimps Stupid Schema stream and
|
|
69
|
-
# constructs the corresponding objects.
|
|
70
|
-
#
|
|
71
|
-
# Here are the rules:
|
|
72
|
-
# * the schema has the structure
|
|
73
|
-
# # this has to be first.
|
|
74
|
-
# - infochimps_schema:
|
|
75
|
-
# schema_version: 0.2 # in case stuff changes
|
|
76
|
-
# # then any number of imw objects:
|
|
77
|
-
# - pool: (...)
|
|
78
|
-
# fields: [era, innings_pitched,
|
|
79
|
-
# - dataset: (...)
|
|
80
|
-
# fields:
|
|
81
|
-
# - name: Earned Run Average
|
|
82
|
-
# handle: era
|
|
83
|
-
# concept: baseball-era
|
|
84
|
-
# units: earned_runs / (9*innings_pitched)
|
|
85
|
-
# - contributor: (...)
|
|
86
|
-
# - field: (...)
|
|
87
|
-
#
|
|
88
|
-
# * Objects are referred to by __handle__, *NOT* __id__. If an ID is
|
|
89
|
-
# included, and an object exists with a non-matching ID or handle,
|
|
90
|
-
# an error will be raised.
|
|
91
|
-
#
|
|
92
|
-
# * We want to make the schema files maintainable by hand, which means that
|
|
93
|
-
# the loader tries to be smart about inline-defined objects. That is, you
|
|
94
|
-
# can either refer to (via handle) a field defined elsewhere, or you can
|
|
95
|
-
# define the field in whole, and trust that the Right Thing will
|
|
96
|
-
# happen. This presents the problem of collisions, though. If a bulk object
|
|
97
|
-
# update arrives, we need to know whom to believe -- bulk loader or
|
|
98
|
-
# database. In the absence of versioning: we look up the object by its
|
|
99
|
-
# handle. If there's an existing object, any new information (fields with
|
|
100
|
-
# values in new that are blank in old) is added to it. If the object is
|
|
101
|
-
# defined at the top level, it wins; if the object is defined as a sub field
|
|
102
|
-
# it loses.
|
|
103
|
-
#
|
|
104
|
-
# * Every interesting object (Pool, Dataset, Contributor, Field) has a desc:
|
|
105
|
-
# attribute (for Pool and Dataset it's virtual but never mind) to describe
|
|
106
|
-
# __itself__. Additionally, every interesting relationship has its own desc: field.
|
|
107
|
-
#
|
|
108
|
-
|
|
109
|
-
def self.undump(schema)
|
|
110
|
-
|
|
111
|
-
# compact then merge -- kill off blank
|
|
112
|
-
end
|
|
113
|
-
end
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
|
2
|
-
require File.join(File.dirname(__FILE__),'../datamapper_spec_helper')
|
|
3
|
-
include IMW
|
|
4
|
-
require 'imw/dataset/datamapper/uri'
|
|
5
|
-
|
|
6
|
-
if IMW::SpecConfig::TEST_WITH_DATAMAPPER
|
|
7
|
-
IMW::SpecConfig.setup_datamapper_test_db
|
|
8
|
-
describe IMW do
|
|
9
|
-
|
|
10
|
-
before(:each) do
|
|
11
|
-
DM_URI.all.each do |u| u.destroy end
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
it "makes a URI from a barely complete string" do
|
|
15
|
-
DM_URI.find_or_create_from_url('google.com')
|
|
16
|
-
u = DM_URI.first
|
|
17
|
-
u.should_not be_nil
|
|
18
|
-
u.host.should == 'google.com'
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
it "behaves as normalized" do
|
|
22
|
-
DM_URI.find_or_create_from_url('google.com')
|
|
23
|
-
u = DM_URI.first
|
|
24
|
-
u.path.should == '/'
|
|
25
|
-
u.scheme.should == 'http'
|
|
26
|
-
u.port.should be_nil
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
it "makes a complicated URI from a complicated string" do
|
|
30
|
-
DM_URI.find_or_create_from_url('http://me:and@your.mom.com:69/what?orly=yarly&ok=then')
|
|
31
|
-
dm_uri = DM_URI.first({
|
|
32
|
-
:scheme => 'http', :host => 'your.mom.com', :port => '69',
|
|
33
|
-
:query => 'what?orly=yarly&ok=then'
|
|
34
|
-
})
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
# it converts to a file path
|
|
38
|
-
# it doesn't leave a trailing / on the file path
|
|
39
|
-
# it escapes unicode URLs
|
|
40
|
-
# it escapes non-URL chars in URL
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
end
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
require 'imw/dataset/datamapper'
|
|
2
|
-
|
|
3
|
-
module IMW::SpecConfig
|
|
4
|
-
|
|
5
|
-
def self.setup_datamapper_test_db
|
|
6
|
-
IMW::Dataset.setup_remote_connection IMW::DEFAULT_DATABASE_CONNECTION_PARAMS.merge({
|
|
7
|
-
:dbname => 'imw_dataset_datamapper_test' })
|
|
8
|
-
DataMapper.auto_migrate!
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
end
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. spec/imw/model/files/archive_spec.rb -- module for use in testing various archive formats
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# The <tt>IMW::Files::Archive</tt> module doesn't implement any
|
|
7
|
-
# functionality of its own but merely adds methods to an including
|
|
8
|
-
# class. Appropriately, this spec file implements a shared example
|
|
9
|
-
# group ("an archive of files") which can be including
|
|
10
|
-
# by the spec of an archive class. This spec must also define the
|
|
11
|
-
# following instance variables:
|
|
12
|
-
#
|
|
13
|
-
# <tt>@archive</tt>:: a subclass of <tt>IMW::Files::BasicFile</tt> which
|
|
14
|
-
# has the <tt>IMW::Files::Archive</tt> module mixed in.
|
|
15
|
-
#
|
|
16
|
-
# <tt>@root_directory</tt>: a string specifying the path where all the
|
|
17
|
-
# files will be created
|
|
18
|
-
#
|
|
19
|
-
# <tt>@initial_directory</tt>: a string specifying the path where some
|
|
20
|
-
# files for the initial creation of the archive will be created.
|
|
21
|
-
#
|
|
22
|
-
# <tt>@appending_directory</tt>: a string specifying the path where
|
|
23
|
-
# all some files for appending to the archive will be created.
|
|
24
|
-
#
|
|
25
|
-
# <tt>@extraction_directory</tt>: a string specifying the path where
|
|
26
|
-
# the archive's files will be extracted.
|
|
27
|
-
#
|
|
28
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
29
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
30
|
-
# License:: GPL 3.0
|
|
31
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
32
|
-
#
|
|
33
|
-
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
|
34
|
-
require IMW_SPEC_DIR+'/imw/matchers/archive_contents_matcher'
|
|
35
|
-
require IMW_SPEC_DIR+'/imw/matchers/directory_contents_matcher'
|
|
36
|
-
|
|
37
|
-
require 'imw/utils/random'
|
|
38
|
-
require 'imw/utils/extensions/find'
|
|
39
|
-
share_examples_for "an archive of files" do
|
|
40
|
-
include Spec::Matchers::IMW
|
|
41
|
-
|
|
42
|
-
def create_random_files
|
|
43
|
-
IMW::Random.directory_with_files(@initial_directory)
|
|
44
|
-
IMW::Random.directory_with_files(@appending_directory)
|
|
45
|
-
FileUtils.mkdir(@extraction_directory)
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def delete_random_files
|
|
49
|
-
FileUtils.rm_rf [@root_directory,@extraction_directory]
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
before(:each) do
|
|
53
|
-
create_random_files
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
after(:each) do
|
|
57
|
-
delete_random_files
|
|
58
|
-
FileUtils.rm(@archive.path) if @archive.exist?
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
describe "(listing)" do
|
|
62
|
-
it "should raise an error when listing a non-existent archive" do
|
|
63
|
-
lambda { @archive.contents }.should raise_error(IMW::Error)
|
|
64
|
-
end
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
describe "(creation)" do
|
|
68
|
-
|
|
69
|
-
it "should be able to create archives which match a directory's structure" do
|
|
70
|
-
@archive.create(@initial_directory + "/*")
|
|
71
|
-
@archive.should contain_paths_like(@initial_directory, :relative_to => @root_directory)
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
it "should raise an error if trying to overwrite an archive without the :force option" do
|
|
75
|
-
@archive.create(@initial_directory + "/*")
|
|
76
|
-
lambda { @archive.create(@initial_directory + "/*") }.should raise_error(IMW::Error)
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
it "should overwrite an archive if the :force option is given" do
|
|
80
|
-
@archive.create(@initial_directory + "/*")
|
|
81
|
-
@archive.create(@initial_directory + "/*", :force => true)
|
|
82
|
-
@archive.should contain_paths_like(@initial_directory, :relative_to => @root_directory)
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
describe "(appending)" do
|
|
87
|
-
|
|
88
|
-
it "should append to an archive which already exists" do
|
|
89
|
-
@archive.create(@initial_directory + "/*")
|
|
90
|
-
@archive.append(@appending_directory + "/*")
|
|
91
|
-
@archive.should contain_paths_like([@initial_directory,@appending_directory], :relative_to => @root_directory)
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
it "should append to an archive which doesn't already exist" do
|
|
95
|
-
@archive.append(@appending_directory + "/*")
|
|
96
|
-
@archive.should contain_paths_like(@appending_directory, :relative_to => @root_directory)
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
describe "(extracting)" do
|
|
102
|
-
|
|
103
|
-
it "should raise an error when trying to extract from a non-existing archive" do
|
|
104
|
-
lambda { @archive.extract }.should raise_error(IMW::Error)
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
it "should extract files which match the original ones it archived" do
|
|
108
|
-
@archive.create(@initial_directory + "/*")
|
|
109
|
-
@archive.append(@appending_directory + "/*")
|
|
110
|
-
new_archive = @archive.cp(@extraction_directory + '/' + @archive.basename)
|
|
111
|
-
new_archive.extract
|
|
112
|
-
@extraction_directory.should contain_files_matching_directory(@root_directory)
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
end
|
|
116
|
-
end unless defined? IMW_FILES_ARCHIVE_SHARED_SPEC
|
|
117
|
-
|
|
118
|
-
# puts "#{File.basename(__FILE__)}: How many drunken frat boys can fit in an Internet kiosk?" # at bottom
|