imw 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. data/.gitignore +4 -1
  2. data/Rakefile +10 -0
  3. data/TODO +18 -0
  4. data/VERSION +1 -1
  5. data/bin/imw +1 -1
  6. data/etc/imwrc.rb +0 -50
  7. data/examples/dataset.rb +12 -0
  8. data/lib/imw/boot.rb +55 -9
  9. data/lib/imw/dataset/paths.rb +15 -24
  10. data/lib/imw/dataset/workflow.rb +131 -72
  11. data/lib/imw/dataset.rb +94 -186
  12. data/lib/imw/parsers/html_parser.rb +1 -1
  13. data/lib/imw/parsers.rb +1 -1
  14. data/lib/imw/repository.rb +3 -27
  15. data/lib/imw/resource.rb +190 -0
  16. data/lib/imw/resources/archive.rb +97 -0
  17. data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
  18. data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
  19. data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
  20. data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
  21. data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
  22. data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
  23. data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
  24. data/lib/imw/resources/archives_and_compressed.rb +32 -0
  25. data/lib/imw/resources/compressed_file.rb +89 -0
  26. data/lib/imw/resources/compressible.rb +77 -0
  27. data/lib/imw/resources/formats/delimited.rb +92 -0
  28. data/lib/imw/resources/formats/excel.rb +125 -0
  29. data/lib/imw/resources/formats/json.rb +53 -0
  30. data/lib/imw/resources/formats/sgml.rb +72 -0
  31. data/lib/imw/resources/formats/yaml.rb +53 -0
  32. data/lib/imw/resources/formats.rb +32 -0
  33. data/lib/imw/resources/local.rb +198 -0
  34. data/lib/imw/resources/remote.rb +110 -0
  35. data/lib/imw/resources/schemes/hdfs.rb +242 -0
  36. data/lib/imw/resources/schemes/http.rb +161 -0
  37. data/lib/imw/resources/schemes/s3.rb +137 -0
  38. data/lib/imw/resources/schemes.rb +19 -0
  39. data/lib/imw/resources.rb +118 -0
  40. data/lib/imw/runner.rb +5 -4
  41. data/lib/imw/transforms/archiver.rb +215 -0
  42. data/lib/imw/transforms/transferer.rb +103 -0
  43. data/lib/imw/transforms.rb +8 -0
  44. data/lib/imw/utils/error.rb +26 -30
  45. data/lib/imw/utils/extensions/array.rb +5 -15
  46. data/lib/imw/utils/extensions/hash.rb +6 -16
  47. data/lib/imw/utils/extensions/hpricot.rb +0 -14
  48. data/lib/imw/utils/extensions/string.rb +5 -15
  49. data/lib/imw/utils/extensions/symbol.rb +0 -13
  50. data/lib/imw/utils/extensions.rb +65 -0
  51. data/lib/imw/utils/log.rb +14 -13
  52. data/lib/imw/utils/misc.rb +0 -6
  53. data/lib/imw/utils/paths.rb +101 -42
  54. data/lib/imw/utils/version.rb +8 -9
  55. data/lib/imw/utils.rb +2 -18
  56. data/lib/imw.rb +92 -17
  57. data/spec/data/sample.csv +1 -1
  58. data/spec/data/sample.json +1 -0
  59. data/spec/data/sample.tsv +1 -1
  60. data/spec/data/sample.txt +1 -1
  61. data/spec/data/sample.xml +1 -1
  62. data/spec/data/sample.yaml +1 -1
  63. data/spec/imw/dataset/paths_spec.rb +32 -0
  64. data/spec/imw/dataset/workflow_spec.rb +41 -0
  65. data/spec/imw/resource_spec.rb +79 -0
  66. data/spec/imw/resources/archive_spec.rb +69 -0
  67. data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
  68. data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
  69. data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
  70. data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
  71. data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
  72. data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
  73. data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
  74. data/spec/imw/resources/compressed_file_spec.rb +48 -0
  75. data/spec/imw/resources/compressible_spec.rb +36 -0
  76. data/spec/imw/resources/formats/delimited_spec.rb +33 -0
  77. data/spec/imw/resources/formats/json_spec.rb +32 -0
  78. data/spec/imw/resources/formats/sgml_spec.rb +24 -0
  79. data/spec/imw/resources/formats/yaml_spec.rb +41 -0
  80. data/spec/imw/resources/local_spec.rb +98 -0
  81. data/spec/imw/resources/remote_spec.rb +35 -0
  82. data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
  83. data/spec/imw/resources/schemes/http_spec.rb +19 -0
  84. data/spec/imw/resources/schemes/s3_spec.rb +19 -0
  85. data/spec/imw/transforms/archiver_spec.rb +120 -0
  86. data/spec/imw/transforms/transferer_spec.rb +113 -0
  87. data/spec/imw/utils/paths_spec.rb +5 -33
  88. data/spec/imw/utils/shared_paths_spec.rb +29 -0
  89. data/spec/spec_helper.rb +5 -5
  90. data/spec/support/paths_matcher.rb +67 -0
  91. data/spec/support/random.rb +39 -36
  92. metadata +88 -75
  93. data/lib/imw/dataset/task.rb +0 -41
  94. data/lib/imw/files/archive.rb +0 -113
  95. data/lib/imw/files/basicfile.rb +0 -122
  96. data/lib/imw/files/binary.rb +0 -28
  97. data/lib/imw/files/compressed_file.rb +0 -93
  98. data/lib/imw/files/compressed_files_and_archives.rb +0 -334
  99. data/lib/imw/files/compressible.rb +0 -103
  100. data/lib/imw/files/csv.rb +0 -113
  101. data/lib/imw/files/directory.rb +0 -62
  102. data/lib/imw/files/excel.rb +0 -84
  103. data/lib/imw/files/json.rb +0 -41
  104. data/lib/imw/files/sgml.rb +0 -46
  105. data/lib/imw/files/text.rb +0 -68
  106. data/lib/imw/files/yaml.rb +0 -46
  107. data/lib/imw/files.rb +0 -125
  108. data/lib/imw/packagers/archiver.rb +0 -126
  109. data/lib/imw/packagers/s3_mover.rb +0 -36
  110. data/lib/imw/packagers.rb +0 -8
  111. data/lib/imw/utils/components.rb +0 -61
  112. data/lib/imw/utils/config.rb +0 -46
  113. data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
  114. data/lib/imw/utils/extensions/core.rb +0 -27
  115. data/lib/imw/utils/extensions/dir.rb +0 -24
  116. data/lib/imw/utils/extensions/file_core.rb +0 -64
  117. data/lib/imw/utils/extensions/typed_struct.rb +0 -22
  118. data/lib/imw/utils/extensions/uri.rb +0 -59
  119. data/lib/imw/utils/view/dump_csv.rb +0 -112
  120. data/lib/imw/utils/view/dump_csv_older.rb +0 -117
  121. data/lib/imw/utils/view.rb +0 -113
  122. data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
  123. data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
  124. data/spec/imw/files/archive_spec.rb +0 -118
  125. data/spec/imw/files/basicfile_spec.rb +0 -121
  126. data/spec/imw/files/bz2_spec.rb +0 -32
  127. data/spec/imw/files/compressed_file_spec.rb +0 -96
  128. data/spec/imw/files/compressible_spec.rb +0 -100
  129. data/spec/imw/files/file_spec.rb +0 -144
  130. data/spec/imw/files/gz_spec.rb +0 -32
  131. data/spec/imw/files/rar_spec.rb +0 -33
  132. data/spec/imw/files/tar_spec.rb +0 -31
  133. data/spec/imw/files/text_spec.rb +0 -23
  134. data/spec/imw/files/zip_spec.rb +0 -31
  135. data/spec/imw/files_spec.rb +0 -38
  136. data/spec/imw/packagers/archiver_spec.rb +0 -125
  137. data/spec/imw/packagers/s3_mover_spec.rb +0 -7
  138. data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
  139. data/spec/imw/utils/extensions/find_spec.rb +0 -113
  140. data/spec/imw/workflow/rip/local_spec.rb +0 -89
  141. data/spec/imw/workflow/rip_spec.rb +0 -27
  142. data/spec/support/archive_contents_matcher.rb +0 -94
  143. data/spec/support/directory_contents_matcher.rb +0 -61
@@ -1,64 +0,0 @@
1
- #
2
- # h2. lib/imw/utils/extensions/file.rb -- extensions to built-in file class
3
- #
4
- # == About
5
- #
6
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
7
- # Copyright:: Copyright (c) 2008 infochimps.org
8
- # License:: GPL 3.0
9
- # Website:: http://infinitemonkeywrench.org/
10
- #
11
-
12
- require 'imw/utils/error'
13
- require 'imw/utils/config'
14
- require 'imw/utils/extensions/string'
15
-
16
- class File
17
-
18
- # Returns the name of the path given:
19
- #
20
- # File.name_of_file("/path/to/somefile.txt") => "somefile".
21
- def self.name_of_file path
22
- basename(path)[0,basename(path).length - extname(path).length]
23
- end
24
-
25
- # Returns what would be the handle of a source or dataset
26
- # described by a file at +path+:
27
- #
28
- # File.handle "/path/to/a_particular_dataset.instructions.yaml" #=> :a_particular_dataset
29
- def self.handle path
30
- File.basename(path).split('.').first.handle
31
- end
32
-
33
- # Returns a unique (non-existing) version of the given +path+ by
34
- # appending successive intgers, useful for copying files ito
35
- # directories without clobbering existing files (a la <tt>wget
36
- # -nc</tt>).
37
- #
38
- # In a directory <tt>/path/to</tt> without a file named
39
- # <tt>data.txt</tt>
40
- #
41
- # File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt"</tt>
42
- #
43
- # If <tt>data.txt</tt> were to already exist in that directory, then
44
- #
45
- # File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.1"
46
- #
47
- # If <tt>data.txt.1</tt> were to already exist then
48
- #
49
- # File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.2"
50
- #
51
- # and so on.
52
- def self.uniquify path
53
- orig_path = path.clone
54
- copy_number = 1
55
- while exist? path do
56
- path = orig_path + ".#{copy_number}"
57
- copy_number += 1
58
- end
59
- path
60
- end
61
-
62
- end
63
-
64
- # puts "#{File.basename(__FILE__)}: You add a bit of glitter and jazz to all the folders in the cabinet. It makes you feel happier when you have to sort through them." # at bottom
@@ -1,22 +0,0 @@
1
- #
2
- # A struct
3
- # but has an idea of what type attributes should be
4
- #
5
- #
6
- class TypedStruct < Struct
7
- def self.new attrs, convs
8
- struct = super *attrs
9
- struct_attr_convs = Hash.zip(attrs, convs).reject{|a,t| t.nil? }
10
- struct.class_eval do
11
- cattr_accessor :attr_convs
12
- self.attr_convs = struct_attr_convs
13
- def remap!
14
- attr_convs.each do |attr, conv|
15
- curr = self.send(attr)
16
- self.send("#{attr}=", curr.send(conv)) if curr.respond_to?(conv)
17
- end
18
- end
19
- end # class_eval
20
- struct
21
- end
22
- end
@@ -1,59 +0,0 @@
1
- #
2
- # h2. lib/imw/utils/extensions/uri.rb -- extensions to uri module
3
- #
4
- # == About
5
- #
6
- # Some useful extensions to the +URI+ module.
7
- #
8
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
9
- # Copyright:: Copyright (c) 2008 infochimps.org
10
- # License:: GPL 3.0
11
- # Website:: http://infinitemonkeywrench.org/
12
- #
13
-
14
- require 'uri'
15
-
16
- module URI
17
-
18
- # List of prefixes ignored when returning domains (or reversed
19
- # domains).
20
- IGNORED_PREFIXES = ['www']
21
-
22
- # Returns the domain of the given URI, first scrubbing it of any
23
- # prefixes we can ignore.
24
- def self.domain(uri)
25
- uriobj = self.parse(uri)
26
- if uriobj.host then
27
- host = uriobj.host
28
- elsif uriobj.path then
29
- host = uriobj.path.split('/')[0]
30
- else
31
- raise ArgumentError, "Invalid URI: #{uri}"
32
- end
33
- # remove any ignored prefixes from the hostname (i.e. - 'www')
34
- parts = host.split('.')
35
- parts = (IGNORED_PREFIXES.member?(parts[0]) ? parts[1...parts.size] : parts)
36
- host = parts.join('.')
37
- host
38
- end
39
-
40
- # Returns the reversed domain of the given URI, first scrubbing it of
41
- # any prefixes we can ignore. Will not reverse numeric addresses of
42
- # the form 127.0.0.1
43
- def self.reverse_domain(uri)
44
- begin
45
- d = self.domain(uri)
46
- # check for numeric ip
47
- # in a TERRIBLE way that needs to be fixed!`
48
- if d=~/^[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*$/ then
49
- return d
50
- else
51
- return d.split('.').reverse.join('.')
52
- end
53
- rescue URI::InvalidURIError,ArgumentError
54
- raise $!
55
- end
56
- end
57
- end
58
-
59
- # puts "#{File.basename(__FILE__)}: In the end, it's either you or I." # at bottom
@@ -1,112 +0,0 @@
1
- # #
2
- # # views
3
- # #
4
- # require 'imw/view/db_infochimps'
5
- #
6
-
7
-
8
- #
9
- # This is where views of the metadata will go (right now it's all just
10
- # sitting in a crapheap within model.rb).
11
- #
12
- # we'll have routines for
13
- #
14
- # - dumping/undumping to yaml
15
- # - dumping/undumping to files that load right into the ics database.
16
- #
17
- class IMWObject
18
-
19
- def self.from_icss(hsh)
20
- # lists of dumpables
21
- self._attr_objlists.each do |attr, cl|
22
- if (vals = hsh.delete(attr.to_s))
23
- hsh[attr] = vals.map{ |val| cl.from_icss(val) }
24
- end
25
- end
26
- # simply dumpable objects
27
- self._attr_objs.each do |attr, cl|
28
- if (val = hsh.delete(attr.to_s))
29
- hsh[attr] = cl.from_icss(val)
30
- end
31
- end
32
- self.new(hsh)
33
- end
34
-
35
- # Dump as a plain hash
36
- def to_icss()
37
- hsh = instance_values
38
- # lists of dumpable objects
39
- self.class._attr_objlists.keys.map(&:to_s).each do |attr|
40
- hsh[attr] = (hsh.delete(attr)||[]).map{ |a| a.to_icss() }
41
- end
42
- # simply dumpable objects
43
- self.class._attr_objs.keys.map(&:to_s).each do |attr|
44
- (v=hsh.delete attr) && hsh[attr] = v.to_icss
45
- end
46
- hsh
47
- end
48
-
49
- # Pivot from object to relational view
50
- def to_csv(parent_id=nil)
51
- tables = {}
52
- sub_ids = []
53
- my_cl = self.class.to_s
54
- self.class._attr_objs.sort.each do |attr, cl|
55
- tables[attr] ||= [] ; tables[attr].push(self[attr].to_csv(id))
56
- join = "%s_%s" % [my_cl, cl.to_s].sort
57
- tables[join] ||= [] ; tables[join].push(id, self[attr].id)
58
- sub_ids.push self[attr].handle
59
- end
60
-
61
- self.class._attr_objlists.sort.each do |attr, cl|
62
- tables[attr] ||= []
63
- join = "%s_%s" % [my_cl, cl.to_s].sort
64
- tables[join] ||= []
65
- self[attr].each do |obj|
66
- tables[attr].push(obj.to_csv(id))
67
- tables[join].push(id, obj.id)
68
- sub_ids.push obj.handle
69
- end
70
- end
71
-
72
-
73
- tables[self.class.to_s] = [
74
- [self.id, parent_id].compact +
75
- slice(self.class._attr_scalars.keys - [:id]) +
76
- sub_ids
77
- ]
78
- tables
79
- end
80
-
81
- end
82
-
83
- class Note < IMWObject
84
- def to_pair()
85
- { self.handle => self.desc }
86
- end
87
- def to_icss()
88
- to_pair
89
- end
90
- def self.from_icss(pair)
91
- self.new Hash.zip([:handle,:desc], pair.to_pair)
92
- end
93
- end
94
-
95
-
96
- class TagList
97
- def self.from_icss(str)
98
- self.from(str)
99
- end
100
- def to_icss()
101
- self.to_s
102
- end
103
- def to_csv(parent_id=nil)
104
- [self.to_s]
105
- end
106
-
107
- def handle() to_s end
108
- end
109
-
110
-
111
-
112
- # You acquire the vision of a sharp-eyed tanzier. We'll just assume that's good.
@@ -1,117 +0,0 @@
1
- # #
2
- # # views
3
- # #
4
- # require 'imw/view/db_infochimps'
5
- #
6
-
7
-
8
- #
9
- # This is where views of the metadata will go (right now it's all just
10
- # sitting in a crapheap within model.rb).
11
- #
12
- # we'll have routines for
13
- #
14
- # - dumping/undumping to yaml
15
- # - dumping/undumping to files that load right into the ics database.
16
- #
17
- class IMWBase
18
-
19
- def self.from_icss(hsh)
20
- # simply dumpable objects
21
- self._attr_has_one.map(&:to_s).each do |attr|
22
- if (val = hsh.delete(attr.to_s))
23
- hsh[attr] = get_attr_class(attr).from_icss(val)
24
- end
25
- end
26
- # lists of dumpables
27
- self._attr_manys.each do |attr|
28
- if (vals = hsh.delete(attr.to_s))
29
- hsh[attr] = vals.map{ |val| get_attr_class(attr).from_icss(val) }
30
- end
31
- end
32
- self.new(hsh)
33
- end
34
-
35
- # Dump as a plain hash
36
- def to_icss()
37
- hsh = instance_values
38
- # simply dumpable objects
39
- self.class._attr_has_one.map(&:to_s).each do |attr|
40
- (v=hsh.delete attr) && hsh[attr] = v.to_icss
41
- end
42
- # lists of dumpable objects
43
- self.class._attr_manys.each do |attr|
44
- hsh[attr] = (hsh.delete(attr)||[]).map{ |a| a.to_icss() }
45
- end
46
- hsh
47
- end
48
-
49
- # Pivot from object to relational view
50
- def to_csv(parent_id=nil)
51
- tables = {}
52
- sub_ids = []
53
- my_cl = self.class.to_s
54
- self.class._attr_has_one.map(&:to_s).sort.each do |attr|
55
- # Banks the object
56
- obj = self[attr]
57
- cl = self.class.get_attr_class(attr).to_s
58
- tables[attr] ||= [] ; tables[attr].push( obj.to_csv(id) )
59
- # tie the parent and child together
60
- join = "%s_%s" % [my_cl, cl].sort
61
- tables[join] ||= [] ; tables[join].push( [id, obj.id] )
62
- sub_ids.push obj.handle
63
- end
64
-
65
- self.class._attr_manys.sort.each do |attr|
66
- objs = self[attr] or next
67
- cl = self.class.get_attr_class(attr).to_s
68
- tables[attr] ||= []
69
- join = "%s_%s" % [my_cl, cl.to_s].sort
70
- tables[join] ||= []
71
- objs.each do |obj|
72
- tables[attr].push(obj.to_csv(id))
73
- tables[join].push(id, obj.id)
74
- sub_ids.push obj.handle
75
- end
76
- end
77
-
78
-
79
- tables[self.class.to_s] = [
80
- [self.id, parent_id].compact +
81
- slice(self.class._attr_scalar - [:id]) +
82
- sub_ids
83
- ].zip(['id', 'pid']+(self.class._attr_scalar - [:id])+self.class._attr_has_one.map(&:to_s).sort)
84
- tables
85
- end
86
-
87
- end
88
-
89
- class Note < IMWBase
90
- # { :format_name => {}, ... } -- must be a hash
91
- def to_pair()
92
- { self.handle => self.desc }
93
- end
94
- def to_icss()
95
- to_pair
96
- end
97
- def self.from_icss(pair)
98
- self.new Hash.zip([:handle,:desc], pair.to_pair)
99
- end
100
- end
101
-
102
-
103
- class TagList
104
- def self.from_icss(str)
105
- self.from(str)
106
- end
107
- def to_icss()
108
- self.to_s
109
- end
110
- def to_csv(parent_id=nil)
111
- [self.to_s]
112
- end
113
-
114
- def handle() to_s end
115
- end
116
-
117
-
@@ -1,113 +0,0 @@
1
-
2
- class ActiveRecord::Base
3
- class << self
4
- end
5
- # def merge!(hsh)
6
- # hsh = hsh.dup
7
- # # puts hsh.to_yaml
8
- # # has_many datasets, notes, fields, contributors
9
- # self.class.reflect_on_all_associations.each do |ass|
10
- # # ["@macro", "@class_name", "@name", "@primary_key_name", "@options",
11
- # # "@klass",
12
- # # "@through_reflection",
13
- # # "@active_record",
14
- # puts [ass.name, ass.macro, ass.primary_key_name].to_yaml
15
- # if ass.macro == :has_many
16
- # els = hsh.delete(ass.name.to_s) || []
17
- # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!", els, '!!'
18
- # els.each do |el|
19
- # puts el
20
- # self[ass.name] = ass.klass.new().merge!(el)
21
- # end
22
- # end
23
- # hsh.each do |key,val|
24
- # self[key] = val
25
- # end
26
- # p self
27
- # p self.datasets if self.respond_to? 'datasets'
28
- # end
29
- # end
30
- def undump(hsh)
31
- puts "unumping from #{hsh.to_json}"
32
- hsh.each{ |k,v| self[k] = v }
33
- self.save!
34
- self
35
- end
36
- end
37
-
38
- class Pool < ActiveRecord::Base
39
- def undump(hsh)
40
- { :datasets => Dataset, :fields => Field,
41
- :contributors => Contributor, :pool_notes => PoolNote }.each do |field, klass|
42
- vals = hsh.delete(field.to_s) || []
43
- puts "Undumping #{vals} info #{field}"
44
- self[field.to_s] = vals.map{|val| f = klass.new().undump(val); f.save!; f}
45
- end
46
- super
47
- self
48
- end
49
- end
50
-
51
- class Dataset < ActiveRecord::Base
52
- def undump(hsh)
53
- { :datasets => Dataset, :fields => Field,
54
- :contributors => Contributor, :dataset_notes => DatasetNote }.each do |field, klass|
55
- vals = hsh.delete(field.to_s) || []
56
- puts "Undumping #{vals} info #{field}"
57
- self[field.to_s] = vals.map{|val| f = klass.new().undump(val); f.save!; f}
58
- end
59
- super
60
- puts "Got Dataset #{self.to_yaml}"
61
- self
62
- end
63
- end
64
-
65
- class IMW < OpenStruct
66
-
67
- #
68
- # Takes an Infochimps Stupid Schema stream and
69
- # constructs the corresponding objects.
70
- #
71
- # Here are the rules:
72
- # * the schema has the structure
73
- # # this has to be first.
74
- # - infochimps_schema:
75
- # schema_version: 0.2 # in case stuff changes
76
- # # then any number of imw objects:
77
- # - pool: (...)
78
- # fields: [era, innings_pitched,
79
- # - dataset: (...)
80
- # fields:
81
- # - name: Earned Run Average
82
- # handle: era
83
- # concept: baseball-era
84
- # units: earned_runs / (9*innings_pitched)
85
- # - contributor: (...)
86
- # - field: (...)
87
- #
88
- # * Objects are referred to by __handle__, *NOT* __id__. If an ID is
89
- # included, and an object exists with a non-matching ID or handle,
90
- # an error will be raised.
91
- #
92
- # * We want to make the schema files maintainable by hand, which means that
93
- # the loader tries to be smart about inline-defined objects. That is, you
94
- # can either refer to (via handle) a field defined elsewhere, or you can
95
- # define the field in whole, and trust that the Right Thing will
96
- # happen. This presents the problem of collisions, though. If a bulk object
97
- # update arrives, we need to know whom to believe -- bulk loader or
98
- # database. In the absence of versioning: we look up the object by its
99
- # handle. If there's an existing object, any new information (fields with
100
- # values in new that are blank in old) is added to it. If the object is
101
- # defined at the top level, it wins; if the object is defined as a sub field
102
- # it loses.
103
- #
104
- # * Every interesting object (Pool, Dataset, Contributor, Field) has a desc:
105
- # attribute (for Pool and Dataset it's virtual but never mind) to describe
106
- # __itself__. Additionally, every interesting relationship has its own desc: field.
107
- #
108
-
109
- def self.undump(schema)
110
-
111
- # compact then merge -- kill off blank
112
- end
113
- end
@@ -1,43 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../../../spec_helper')
2
- require File.join(File.dirname(__FILE__),'../datamapper_spec_helper')
3
- include IMW
4
- require 'imw/dataset/datamapper/uri'
5
-
6
- if IMW::SpecConfig::TEST_WITH_DATAMAPPER
7
- IMW::SpecConfig.setup_datamapper_test_db
8
- describe IMW do
9
-
10
- before(:each) do
11
- DM_URI.all.each do |u| u.destroy end
12
- end
13
-
14
- it "makes a URI from a barely complete string" do
15
- DM_URI.find_or_create_from_url('google.com')
16
- u = DM_URI.first
17
- u.should_not be_nil
18
- u.host.should == 'google.com'
19
- end
20
-
21
- it "behaves as normalized" do
22
- DM_URI.find_or_create_from_url('google.com')
23
- u = DM_URI.first
24
- u.path.should == '/'
25
- u.scheme.should == 'http'
26
- u.port.should be_nil
27
- end
28
-
29
- it "makes a complicated URI from a complicated string" do
30
- DM_URI.find_or_create_from_url('http://me:and@your.mom.com:69/what?orly=yarly&ok=then')
31
- dm_uri = DM_URI.first({
32
- :scheme => 'http', :host => 'your.mom.com', :port => '69',
33
- :query => 'what?orly=yarly&ok=then'
34
- })
35
- end
36
-
37
- # it converts to a file path
38
- # it doesn't leave a trailing / on the file path
39
- # it escapes unicode URLs
40
- # it escapes non-URL chars in URL
41
- end
42
-
43
- end
@@ -1,11 +0,0 @@
1
- require 'imw/dataset/datamapper'
2
-
3
- module IMW::SpecConfig
4
-
5
- def self.setup_datamapper_test_db
6
- IMW::Dataset.setup_remote_connection IMW::DEFAULT_DATABASE_CONNECTION_PARAMS.merge({
7
- :dbname => 'imw_dataset_datamapper_test' })
8
- DataMapper.auto_migrate!
9
- end
10
-
11
- end
@@ -1,118 +0,0 @@
1
- #
2
- # h2. spec/imw/model/files/archive_spec.rb -- module for use in testing various archive formats
3
- #
4
- # == About
5
- #
6
- # The <tt>IMW::Files::Archive</tt> module doesn't implement any
7
- # functionality of its own but merely adds methods to an including
8
- # class. Appropriately, this spec file implements a shared example
9
- # group ("an archive of files") which can be including
10
- # by the spec of an archive class. This spec must also define the
11
- # following instance variables:
12
- #
13
- # <tt>@archive</tt>:: a subclass of <tt>IMW::Files::BasicFile</tt> which
14
- # has the <tt>IMW::Files::Archive</tt> module mixed in.
15
- #
16
- # <tt>@root_directory</tt>: a string specifying the path where all the
17
- # files will be created
18
- #
19
- # <tt>@initial_directory</tt>: a string specifying the path where some
20
- # files for the initial creation of the archive will be created.
21
- #
22
- # <tt>@appending_directory</tt>: a string specifying the path where
23
- # all some files for appending to the archive will be created.
24
- #
25
- # <tt>@extraction_directory</tt>: a string specifying the path where
26
- # the archive's files will be extracted.
27
- #
28
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
29
- # Copyright:: Copyright (c) 2008 infochimps.org
30
- # License:: GPL 3.0
31
- # Website:: http://infinitemonkeywrench.org/
32
- #
33
- require File.join(File.dirname(__FILE__),'../../../spec_helper')
34
- require IMW_SPEC_DIR+'/imw/matchers/archive_contents_matcher'
35
- require IMW_SPEC_DIR+'/imw/matchers/directory_contents_matcher'
36
-
37
- require 'imw/utils/random'
38
- require 'imw/utils/extensions/find'
39
- share_examples_for "an archive of files" do
40
- include Spec::Matchers::IMW
41
-
42
- def create_random_files
43
- IMW::Random.directory_with_files(@initial_directory)
44
- IMW::Random.directory_with_files(@appending_directory)
45
- FileUtils.mkdir(@extraction_directory)
46
- end
47
-
48
- def delete_random_files
49
- FileUtils.rm_rf [@root_directory,@extraction_directory]
50
- end
51
-
52
- before(:each) do
53
- create_random_files
54
- end
55
-
56
- after(:each) do
57
- delete_random_files
58
- FileUtils.rm(@archive.path) if @archive.exist?
59
- end
60
-
61
- describe "(listing)" do
62
- it "should raise an error when listing a non-existent archive" do
63
- lambda { @archive.contents }.should raise_error(IMW::Error)
64
- end
65
- end
66
-
67
- describe "(creation)" do
68
-
69
- it "should be able to create archives which match a directory's structure" do
70
- @archive.create(@initial_directory + "/*")
71
- @archive.should contain_paths_like(@initial_directory, :relative_to => @root_directory)
72
- end
73
-
74
- it "should raise an error if trying to overwrite an archive without the :force option" do
75
- @archive.create(@initial_directory + "/*")
76
- lambda { @archive.create(@initial_directory + "/*") }.should raise_error(IMW::Error)
77
- end
78
-
79
- it "should overwrite an archive if the :force option is given" do
80
- @archive.create(@initial_directory + "/*")
81
- @archive.create(@initial_directory + "/*", :force => true)
82
- @archive.should contain_paths_like(@initial_directory, :relative_to => @root_directory)
83
- end
84
- end
85
-
86
- describe "(appending)" do
87
-
88
- it "should append to an archive which already exists" do
89
- @archive.create(@initial_directory + "/*")
90
- @archive.append(@appending_directory + "/*")
91
- @archive.should contain_paths_like([@initial_directory,@appending_directory], :relative_to => @root_directory)
92
- end
93
-
94
- it "should append to an archive which doesn't already exist" do
95
- @archive.append(@appending_directory + "/*")
96
- @archive.should contain_paths_like(@appending_directory, :relative_to => @root_directory)
97
- end
98
-
99
- end
100
-
101
- describe "(extracting)" do
102
-
103
- it "should raise an error when trying to extract from a non-existing archive" do
104
- lambda { @archive.extract }.should raise_error(IMW::Error)
105
- end
106
-
107
- it "should extract files which match the original ones it archived" do
108
- @archive.create(@initial_directory + "/*")
109
- @archive.append(@appending_directory + "/*")
110
- new_archive = @archive.cp(@extraction_directory + '/' + @archive.basename)
111
- new_archive.extract
112
- @extraction_directory.should contain_files_matching_directory(@root_directory)
113
- end
114
-
115
- end
116
- end unless defined? IMW_FILES_ARCHIVE_SHARED_SPEC
117
-
118
- # puts "#{File.basename(__FILE__)}: How many drunken frat boys can fit in an Internet kiosk?" # at bottom