imw 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. data/.gitignore +4 -1
  2. data/Rakefile +10 -0
  3. data/TODO +18 -0
  4. data/VERSION +1 -1
  5. data/bin/imw +1 -1
  6. data/etc/imwrc.rb +0 -50
  7. data/examples/dataset.rb +12 -0
  8. data/lib/imw/boot.rb +55 -9
  9. data/lib/imw/dataset/paths.rb +15 -24
  10. data/lib/imw/dataset/workflow.rb +131 -72
  11. data/lib/imw/dataset.rb +94 -186
  12. data/lib/imw/parsers/html_parser.rb +1 -1
  13. data/lib/imw/parsers.rb +1 -1
  14. data/lib/imw/repository.rb +3 -27
  15. data/lib/imw/resource.rb +190 -0
  16. data/lib/imw/resources/archive.rb +97 -0
  17. data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
  18. data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
  19. data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
  20. data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
  21. data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
  22. data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
  23. data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
  24. data/lib/imw/resources/archives_and_compressed.rb +32 -0
  25. data/lib/imw/resources/compressed_file.rb +89 -0
  26. data/lib/imw/resources/compressible.rb +77 -0
  27. data/lib/imw/resources/formats/delimited.rb +92 -0
  28. data/lib/imw/resources/formats/excel.rb +125 -0
  29. data/lib/imw/resources/formats/json.rb +53 -0
  30. data/lib/imw/resources/formats/sgml.rb +72 -0
  31. data/lib/imw/resources/formats/yaml.rb +53 -0
  32. data/lib/imw/resources/formats.rb +32 -0
  33. data/lib/imw/resources/local.rb +198 -0
  34. data/lib/imw/resources/remote.rb +110 -0
  35. data/lib/imw/resources/schemes/hdfs.rb +242 -0
  36. data/lib/imw/resources/schemes/http.rb +161 -0
  37. data/lib/imw/resources/schemes/s3.rb +137 -0
  38. data/lib/imw/resources/schemes.rb +19 -0
  39. data/lib/imw/resources.rb +118 -0
  40. data/lib/imw/runner.rb +5 -4
  41. data/lib/imw/transforms/archiver.rb +215 -0
  42. data/lib/imw/transforms/transferer.rb +103 -0
  43. data/lib/imw/transforms.rb +8 -0
  44. data/lib/imw/utils/error.rb +26 -30
  45. data/lib/imw/utils/extensions/array.rb +5 -15
  46. data/lib/imw/utils/extensions/hash.rb +6 -16
  47. data/lib/imw/utils/extensions/hpricot.rb +0 -14
  48. data/lib/imw/utils/extensions/string.rb +5 -15
  49. data/lib/imw/utils/extensions/symbol.rb +0 -13
  50. data/lib/imw/utils/extensions.rb +65 -0
  51. data/lib/imw/utils/log.rb +14 -13
  52. data/lib/imw/utils/misc.rb +0 -6
  53. data/lib/imw/utils/paths.rb +101 -42
  54. data/lib/imw/utils/version.rb +8 -9
  55. data/lib/imw/utils.rb +2 -18
  56. data/lib/imw.rb +92 -17
  57. data/spec/data/sample.csv +1 -1
  58. data/spec/data/sample.json +1 -0
  59. data/spec/data/sample.tsv +1 -1
  60. data/spec/data/sample.txt +1 -1
  61. data/spec/data/sample.xml +1 -1
  62. data/spec/data/sample.yaml +1 -1
  63. data/spec/imw/dataset/paths_spec.rb +32 -0
  64. data/spec/imw/dataset/workflow_spec.rb +41 -0
  65. data/spec/imw/resource_spec.rb +79 -0
  66. data/spec/imw/resources/archive_spec.rb +69 -0
  67. data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
  68. data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
  69. data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
  70. data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
  71. data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
  72. data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
  73. data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
  74. data/spec/imw/resources/compressed_file_spec.rb +48 -0
  75. data/spec/imw/resources/compressible_spec.rb +36 -0
  76. data/spec/imw/resources/formats/delimited_spec.rb +33 -0
  77. data/spec/imw/resources/formats/json_spec.rb +32 -0
  78. data/spec/imw/resources/formats/sgml_spec.rb +24 -0
  79. data/spec/imw/resources/formats/yaml_spec.rb +41 -0
  80. data/spec/imw/resources/local_spec.rb +98 -0
  81. data/spec/imw/resources/remote_spec.rb +35 -0
  82. data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
  83. data/spec/imw/resources/schemes/http_spec.rb +19 -0
  84. data/spec/imw/resources/schemes/s3_spec.rb +19 -0
  85. data/spec/imw/transforms/archiver_spec.rb +120 -0
  86. data/spec/imw/transforms/transferer_spec.rb +113 -0
  87. data/spec/imw/utils/paths_spec.rb +5 -33
  88. data/spec/imw/utils/shared_paths_spec.rb +29 -0
  89. data/spec/spec_helper.rb +5 -5
  90. data/spec/support/paths_matcher.rb +67 -0
  91. data/spec/support/random.rb +39 -36
  92. metadata +88 -75
  93. data/lib/imw/dataset/task.rb +0 -41
  94. data/lib/imw/files/archive.rb +0 -113
  95. data/lib/imw/files/basicfile.rb +0 -122
  96. data/lib/imw/files/binary.rb +0 -28
  97. data/lib/imw/files/compressed_file.rb +0 -93
  98. data/lib/imw/files/compressed_files_and_archives.rb +0 -334
  99. data/lib/imw/files/compressible.rb +0 -103
  100. data/lib/imw/files/csv.rb +0 -113
  101. data/lib/imw/files/directory.rb +0 -62
  102. data/lib/imw/files/excel.rb +0 -84
  103. data/lib/imw/files/json.rb +0 -41
  104. data/lib/imw/files/sgml.rb +0 -46
  105. data/lib/imw/files/text.rb +0 -68
  106. data/lib/imw/files/yaml.rb +0 -46
  107. data/lib/imw/files.rb +0 -125
  108. data/lib/imw/packagers/archiver.rb +0 -126
  109. data/lib/imw/packagers/s3_mover.rb +0 -36
  110. data/lib/imw/packagers.rb +0 -8
  111. data/lib/imw/utils/components.rb +0 -61
  112. data/lib/imw/utils/config.rb +0 -46
  113. data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
  114. data/lib/imw/utils/extensions/core.rb +0 -27
  115. data/lib/imw/utils/extensions/dir.rb +0 -24
  116. data/lib/imw/utils/extensions/file_core.rb +0 -64
  117. data/lib/imw/utils/extensions/typed_struct.rb +0 -22
  118. data/lib/imw/utils/extensions/uri.rb +0 -59
  119. data/lib/imw/utils/view/dump_csv.rb +0 -112
  120. data/lib/imw/utils/view/dump_csv_older.rb +0 -117
  121. data/lib/imw/utils/view.rb +0 -113
  122. data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
  123. data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
  124. data/spec/imw/files/archive_spec.rb +0 -118
  125. data/spec/imw/files/basicfile_spec.rb +0 -121
  126. data/spec/imw/files/bz2_spec.rb +0 -32
  127. data/spec/imw/files/compressed_file_spec.rb +0 -96
  128. data/spec/imw/files/compressible_spec.rb +0 -100
  129. data/spec/imw/files/file_spec.rb +0 -144
  130. data/spec/imw/files/gz_spec.rb +0 -32
  131. data/spec/imw/files/rar_spec.rb +0 -33
  132. data/spec/imw/files/tar_spec.rb +0 -31
  133. data/spec/imw/files/text_spec.rb +0 -23
  134. data/spec/imw/files/zip_spec.rb +0 -31
  135. data/spec/imw/files_spec.rb +0 -38
  136. data/spec/imw/packagers/archiver_spec.rb +0 -125
  137. data/spec/imw/packagers/s3_mover_spec.rb +0 -7
  138. data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
  139. data/spec/imw/utils/extensions/find_spec.rb +0 -113
  140. data/spec/imw/workflow/rip/local_spec.rb +0 -89
  141. data/spec/imw/workflow/rip_spec.rb +0 -27
  142. data/spec/support/archive_contents_matcher.rb +0 -94
  143. data/spec/support/directory_contents_matcher.rb +0 -61
@@ -1,64 +0,0 @@
1
- #
2
- # h2. lib/imw/utils/extensions/file.rb -- extensions to built-in file class
3
- #
4
- # == About
5
- #
6
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
7
- # Copyright:: Copyright (c) 2008 infochimps.org
8
- # License:: GPL 3.0
9
- # Website:: http://infinitemonkeywrench.org/
10
- #
11
-
12
- require 'imw/utils/error'
13
- require 'imw/utils/config'
14
- require 'imw/utils/extensions/string'
15
-
16
- class File
17
-
18
- # Returns the name of the path given:
19
- #
20
- # File.name_of_file("/path/to/somefile.txt") => "somefile".
21
- def self.name_of_file path
22
- basename(path)[0,basename(path).length - extname(path).length]
23
- end
24
-
25
- # Returns what would be the handle of a source or dataset
26
- # described by a file at +path+:
27
- #
28
- # File.handle "/path/to/a_particular_dataset.instructions.yaml" #=> :a_particular_dataset
29
- def self.handle path
30
- File.basename(path).split('.').first.handle
31
- end
32
-
33
- # Returns a unique (non-existing) version of the given +path+ by
34
- # appending successive intgers, useful for copying files ito
35
- # directories without clobbering existing files (a la <tt>wget
36
- # -nc</tt>).
37
- #
38
- # In a directory <tt>/path/to</tt> without a file named
39
- # <tt>data.txt</tt>
40
- #
41
- # File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt"</tt>
42
- #
43
- # If <tt>data.txt</tt> were to already exist in that directory, then
44
- #
45
- # File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.1"
46
- #
47
- # If <tt>data.txt.1</tt> were to already exist then
48
- #
49
- # File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.2"
50
- #
51
- # and so on.
52
- def self.uniquify path
53
- orig_path = path.clone
54
- copy_number = 1
55
- while exist? path do
56
- path = orig_path + ".#{copy_number}"
57
- copy_number += 1
58
- end
59
- path
60
- end
61
-
62
- end
63
-
64
- # puts "#{File.basename(__FILE__)}: You add a bit of glitter and jazz to all the folders in the cabinet. It makes you feel happier when you have to sort through them." # at bottom
@@ -1,22 +0,0 @@
1
- #
2
- # A struct
3
- # but has an idea of what type attributes should be
4
- #
5
- #
6
- class TypedStruct < Struct
7
- def self.new attrs, convs
8
- struct = super *attrs
9
- struct_attr_convs = Hash.zip(attrs, convs).reject{|a,t| t.nil? }
10
- struct.class_eval do
11
- cattr_accessor :attr_convs
12
- self.attr_convs = struct_attr_convs
13
- def remap!
14
- attr_convs.each do |attr, conv|
15
- curr = self.send(attr)
16
- self.send("#{attr}=", curr.send(conv)) if curr.respond_to?(conv)
17
- end
18
- end
19
- end # class_eval
20
- struct
21
- end
22
- end
@@ -1,59 +0,0 @@
1
- #
2
- # h2. lib/imw/utils/extensions/uri.rb -- extensions to uri module
3
- #
4
- # == About
5
- #
6
- # Some useful extensions to the +URI+ module.
7
- #
8
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
9
- # Copyright:: Copyright (c) 2008 infochimps.org
10
- # License:: GPL 3.0
11
- # Website:: http://infinitemonkeywrench.org/
12
- #
13
-
14
- require 'uri'
15
-
16
- module URI
17
-
18
- # List of prefixes ignored when returning domains (or reversed
19
- # domains).
20
- IGNORED_PREFIXES = ['www']
21
-
22
- # Returns the domain of the given URI, first scrubbing it of any
23
- # prefixes we can ignore.
24
- def self.domain(uri)
25
- uriobj = self.parse(uri)
26
- if uriobj.host then
27
- host = uriobj.host
28
- elsif uriobj.path then
29
- host = uriobj.path.split('/')[0]
30
- else
31
- raise ArgumentError, "Invalid URI: #{uri}"
32
- end
33
- # remove any ignored prefixes from the hostname (i.e. - 'www')
34
- parts = host.split('.')
35
- parts = (IGNORED_PREFIXES.member?(parts[0]) ? parts[1...parts.size] : parts)
36
- host = parts.join('.')
37
- host
38
- end
39
-
40
- # Returns the reversed domain of the given URI, first scrubbing it of
41
- # any prefixes we can ignore. Will not reverse numeric addresses of
42
- # the form 127.0.0.1
43
- def self.reverse_domain(uri)
44
- begin
45
- d = self.domain(uri)
46
- # check for numeric ip
47
- # in a TERRIBLE way that needs to be fixed!`
48
- if d=~/^[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*$/ then
49
- return d
50
- else
51
- return d.split('.').reverse.join('.')
52
- end
53
- rescue URI::InvalidURIError,ArgumentError
54
- raise $!
55
- end
56
- end
57
- end
58
-
59
- # puts "#{File.basename(__FILE__)}: In the end, it's either you or I." # at bottom
@@ -1,112 +0,0 @@
1
- # #
2
- # # views
3
- # #
4
- # require 'imw/view/db_infochimps'
5
- #
6
-
7
-
8
- #
9
- # This is where views of the metadata will go (right now it's all just
10
- # sitting in a crapheap within model.rb).
11
- #
12
- # we'll have routines for
13
- #
14
- # - dumping/undumping to yaml
15
- # - dumping/undumping to files that load right into the ics database.
16
- #
17
- class IMWObject
18
-
19
- def self.from_icss(hsh)
20
- # lists of dumpables
21
- self._attr_objlists.each do |attr, cl|
22
- if (vals = hsh.delete(attr.to_s))
23
- hsh[attr] = vals.map{ |val| cl.from_icss(val) }
24
- end
25
- end
26
- # simply dumpable objects
27
- self._attr_objs.each do |attr, cl|
28
- if (val = hsh.delete(attr.to_s))
29
- hsh[attr] = cl.from_icss(val)
30
- end
31
- end
32
- self.new(hsh)
33
- end
34
-
35
- # Dump as a plain hash
36
- def to_icss()
37
- hsh = instance_values
38
- # lists of dumpable objects
39
- self.class._attr_objlists.keys.map(&:to_s).each do |attr|
40
- hsh[attr] = (hsh.delete(attr)||[]).map{ |a| a.to_icss() }
41
- end
42
- # simply dumpable objects
43
- self.class._attr_objs.keys.map(&:to_s).each do |attr|
44
- (v=hsh.delete attr) && hsh[attr] = v.to_icss
45
- end
46
- hsh
47
- end
48
-
49
- # Pivot from object to relational view
50
- def to_csv(parent_id=nil)
51
- tables = {}
52
- sub_ids = []
53
- my_cl = self.class.to_s
54
- self.class._attr_objs.sort.each do |attr, cl|
55
- tables[attr] ||= [] ; tables[attr].push(self[attr].to_csv(id))
56
- join = "%s_%s" % [my_cl, cl.to_s].sort
57
- tables[join] ||= [] ; tables[join].push(id, self[attr].id)
58
- sub_ids.push self[attr].handle
59
- end
60
-
61
- self.class._attr_objlists.sort.each do |attr, cl|
62
- tables[attr] ||= []
63
- join = "%s_%s" % [my_cl, cl.to_s].sort
64
- tables[join] ||= []
65
- self[attr].each do |obj|
66
- tables[attr].push(obj.to_csv(id))
67
- tables[join].push(id, obj.id)
68
- sub_ids.push obj.handle
69
- end
70
- end
71
-
72
-
73
- tables[self.class.to_s] = [
74
- [self.id, parent_id].compact +
75
- slice(self.class._attr_scalars.keys - [:id]) +
76
- sub_ids
77
- ]
78
- tables
79
- end
80
-
81
- end
82
-
83
- class Note < IMWObject
84
- def to_pair()
85
- { self.handle => self.desc }
86
- end
87
- def to_icss()
88
- to_pair
89
- end
90
- def self.from_icss(pair)
91
- self.new Hash.zip([:handle,:desc], pair.to_pair)
92
- end
93
- end
94
-
95
-
96
- class TagList
97
- def self.from_icss(str)
98
- self.from(str)
99
- end
100
- def to_icss()
101
- self.to_s
102
- end
103
- def to_csv(parent_id=nil)
104
- [self.to_s]
105
- end
106
-
107
- def handle() to_s end
108
- end
109
-
110
-
111
-
112
- # You acquire the vision of a sharp-eyed tanzier. We'll just assume that's good.
@@ -1,117 +0,0 @@
1
- # #
2
- # # views
3
- # #
4
- # require 'imw/view/db_infochimps'
5
- #
6
-
7
-
8
- #
9
- # This is where views of the metadata will go (right now it's all just
10
- # sitting in a crapheap within model.rb).
11
- #
12
- # we'll have routines for
13
- #
14
- # - dumping/undumping to yaml
15
- # - dumping/undumping to files that load right into the ics database.
16
- #
17
- class IMWBase
18
-
19
- def self.from_icss(hsh)
20
- # simply dumpable objects
21
- self._attr_has_one.map(&:to_s).each do |attr|
22
- if (val = hsh.delete(attr.to_s))
23
- hsh[attr] = get_attr_class(attr).from_icss(val)
24
- end
25
- end
26
- # lists of dumpables
27
- self._attr_manys.each do |attr|
28
- if (vals = hsh.delete(attr.to_s))
29
- hsh[attr] = vals.map{ |val| get_attr_class(attr).from_icss(val) }
30
- end
31
- end
32
- self.new(hsh)
33
- end
34
-
35
- # Dump as a plain hash
36
- def to_icss()
37
- hsh = instance_values
38
- # simply dumpable objects
39
- self.class._attr_has_one.map(&:to_s).each do |attr|
40
- (v=hsh.delete attr) && hsh[attr] = v.to_icss
41
- end
42
- # lists of dumpable objects
43
- self.class._attr_manys.each do |attr|
44
- hsh[attr] = (hsh.delete(attr)||[]).map{ |a| a.to_icss() }
45
- end
46
- hsh
47
- end
48
-
49
- # Pivot from object to relational view
50
- def to_csv(parent_id=nil)
51
- tables = {}
52
- sub_ids = []
53
- my_cl = self.class.to_s
54
- self.class._attr_has_one.map(&:to_s).sort.each do |attr|
55
- # Banks the object
56
- obj = self[attr]
57
- cl = self.class.get_attr_class(attr).to_s
58
- tables[attr] ||= [] ; tables[attr].push( obj.to_csv(id) )
59
- # tie the parent and child together
60
- join = "%s_%s" % [my_cl, cl].sort
61
- tables[join] ||= [] ; tables[join].push( [id, obj.id] )
62
- sub_ids.push obj.handle
63
- end
64
-
65
- self.class._attr_manys.sort.each do |attr|
66
- objs = self[attr] or next
67
- cl = self.class.get_attr_class(attr).to_s
68
- tables[attr] ||= []
69
- join = "%s_%s" % [my_cl, cl.to_s].sort
70
- tables[join] ||= []
71
- objs.each do |obj|
72
- tables[attr].push(obj.to_csv(id))
73
- tables[join].push(id, obj.id)
74
- sub_ids.push obj.handle
75
- end
76
- end
77
-
78
-
79
- tables[self.class.to_s] = [
80
- [self.id, parent_id].compact +
81
- slice(self.class._attr_scalar - [:id]) +
82
- sub_ids
83
- ].zip(['id', 'pid']+(self.class._attr_scalar - [:id])+self.class._attr_has_one.map(&:to_s).sort)
84
- tables
85
- end
86
-
87
- end
88
-
89
- class Note < IMWBase
90
- # { :format_name => {}, ... } -- must be a hash
91
- def to_pair()
92
- { self.handle => self.desc }
93
- end
94
- def to_icss()
95
- to_pair
96
- end
97
- def self.from_icss(pair)
98
- self.new Hash.zip([:handle,:desc], pair.to_pair)
99
- end
100
- end
101
-
102
-
103
- class TagList
104
- def self.from_icss(str)
105
- self.from(str)
106
- end
107
- def to_icss()
108
- self.to_s
109
- end
110
- def to_csv(parent_id=nil)
111
- [self.to_s]
112
- end
113
-
114
- def handle() to_s end
115
- end
116
-
117
-
@@ -1,113 +0,0 @@
1
-
2
- class ActiveRecord::Base
3
- class << self
4
- end
5
- # def merge!(hsh)
6
- # hsh = hsh.dup
7
- # # puts hsh.to_yaml
8
- # # has_many datasets, notes, fields, contributors
9
- # self.class.reflect_on_all_associations.each do |ass|
10
- # # ["@macro", "@class_name", "@name", "@primary_key_name", "@options",
11
- # # "@klass",
12
- # # "@through_reflection",
13
- # # "@active_record",
14
- # puts [ass.name, ass.macro, ass.primary_key_name].to_yaml
15
- # if ass.macro == :has_many
16
- # els = hsh.delete(ass.name.to_s) || []
17
- # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!", els, '!!'
18
- # els.each do |el|
19
- # puts el
20
- # self[ass.name] = ass.klass.new().merge!(el)
21
- # end
22
- # end
23
- # hsh.each do |key,val|
24
- # self[key] = val
25
- # end
26
- # p self
27
- # p self.datasets if self.respond_to? 'datasets'
28
- # end
29
- # end
30
- def undump(hsh)
31
- puts "unumping from #{hsh.to_json}"
32
- hsh.each{ |k,v| self[k] = v }
33
- self.save!
34
- self
35
- end
36
- end
37
-
38
- class Pool < ActiveRecord::Base
39
- def undump(hsh)
40
- { :datasets => Dataset, :fields => Field,
41
- :contributors => Contributor, :pool_notes => PoolNote }.each do |field, klass|
42
- vals = hsh.delete(field.to_s) || []
43
- puts "Undumping #{vals} info #{field}"
44
- self[field.to_s] = vals.map{|val| f = klass.new().undump(val); f.save!; f}
45
- end
46
- super
47
- self
48
- end
49
- end
50
-
51
- class Dataset < ActiveRecord::Base
52
- def undump(hsh)
53
- { :datasets => Dataset, :fields => Field,
54
- :contributors => Contributor, :dataset_notes => DatasetNote }.each do |field, klass|
55
- vals = hsh.delete(field.to_s) || []
56
- puts "Undumping #{vals} info #{field}"
57
- self[field.to_s] = vals.map{|val| f = klass.new().undump(val); f.save!; f}
58
- end
59
- super
60
- puts "Got Dataset #{self.to_yaml}"
61
- self
62
- end
63
- end
64
-
65
- class IMW < OpenStruct
66
-
67
- #
68
- # Takes an Infochimps Stupid Schema stream and
69
- # constructs the corresponding objects.
70
- #
71
- # Here are the rules:
72
- # * the schema has the structure
73
- # # this has to be first.
74
- # - infochimps_schema:
75
- # schema_version: 0.2 # in case stuff changes
76
- # # then any number of imw objects:
77
- # - pool: (...)
78
- # fields: [era, innings_pitched,
79
- # - dataset: (...)
80
- # fields:
81
- # - name: Earned Run Average
82
- # handle: era
83
- # concept: baseball-era
84
- # units: earned_runs / (9*innings_pitched)
85
- # - contributor: (...)
86
- # - field: (...)
87
- #
88
- # * Objects are referred to by __handle__, *NOT* __id__. If an ID is
89
- # included, and an object exists with a non-matching ID or handle,
90
- # an error will be raised.
91
- #
92
- # * We want to make the schema files maintainable by hand, which means that
93
- # the loader tries to be smart about inline-defined objects. That is, you
94
- # can either refer to (via handle) a field defined elsewhere, or you can
95
- # define the field in whole, and trust that the Right Thing will
96
- # happen. This presents the problem of collisions, though. If a bulk object
97
- # update arrives, we need to know whom to believe -- bulk loader or
98
- # database. In the absence of versioning: we look up the object by its
99
- # handle. If there's an existing object, any new information (fields with
100
- # values in new that are blank in old) is added to it. If the object is
101
- # defined at the top level, it wins; if the object is defined as a sub field
102
- # it loses.
103
- #
104
- # * Every interesting object (Pool, Dataset, Contributor, Field) has a desc:
105
- # attribute (for Pool and Dataset it's virtual but never mind) to describe
106
- # __itself__. Additionally, every interesting relationship has its own desc: field.
107
- #
108
-
109
- def self.undump(schema)
110
-
111
- # compact then merge -- kill off blank
112
- end
113
- end
@@ -1,43 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../../../spec_helper')
2
- require File.join(File.dirname(__FILE__),'../datamapper_spec_helper')
3
- include IMW
4
- require 'imw/dataset/datamapper/uri'
5
-
6
- if IMW::SpecConfig::TEST_WITH_DATAMAPPER
7
- IMW::SpecConfig.setup_datamapper_test_db
8
- describe IMW do
9
-
10
- before(:each) do
11
- DM_URI.all.each do |u| u.destroy end
12
- end
13
-
14
- it "makes a URI from a barely complete string" do
15
- DM_URI.find_or_create_from_url('google.com')
16
- u = DM_URI.first
17
- u.should_not be_nil
18
- u.host.should == 'google.com'
19
- end
20
-
21
- it "behaves as normalized" do
22
- DM_URI.find_or_create_from_url('google.com')
23
- u = DM_URI.first
24
- u.path.should == '/'
25
- u.scheme.should == 'http'
26
- u.port.should be_nil
27
- end
28
-
29
- it "makes a complicated URI from a complicated string" do
30
- DM_URI.find_or_create_from_url('http://me:and@your.mom.com:69/what?orly=yarly&ok=then')
31
- dm_uri = DM_URI.first({
32
- :scheme => 'http', :host => 'your.mom.com', :port => '69',
33
- :query => 'what?orly=yarly&ok=then'
34
- })
35
- end
36
-
37
- # it converts to a file path
38
- # it doesn't leave a trailing / on the file path
39
- # it escapes unicode URLs
40
- # it escapes non-URL chars in URL
41
- end
42
-
43
- end
@@ -1,11 +0,0 @@
1
- require 'imw/dataset/datamapper'
2
-
3
- module IMW::SpecConfig
4
-
5
- def self.setup_datamapper_test_db
6
- IMW::Dataset.setup_remote_connection IMW::DEFAULT_DATABASE_CONNECTION_PARAMS.merge({
7
- :dbname => 'imw_dataset_datamapper_test' })
8
- DataMapper.auto_migrate!
9
- end
10
-
11
- end
@@ -1,118 +0,0 @@
1
- #
2
- # h2. spec/imw/model/files/archive_spec.rb -- module for use in testing various archive formats
3
- #
4
- # == About
5
- #
6
- # The <tt>IMW::Files::Archive</tt> module doesn't implement any
7
- # functionality of its own but merely adds methods to an including
8
- # class. Appropriately, this spec file implements a shared example
9
- # group ("an archive of files") which can be including
10
- # by the spec of an archive class. This spec must also define the
11
- # following instance variables:
12
- #
13
- # <tt>@archive</tt>:: a subclass of <tt>IMW::Files::BasicFile</tt> which
14
- # has the <tt>IMW::Files::Archive</tt> module mixed in.
15
- #
16
- # <tt>@root_directory</tt>: a string specifying the path where all the
17
- # files will be created
18
- #
19
- # <tt>@initial_directory</tt>: a string specifying the path where some
20
- # files for the initial creation of the archive will be created.
21
- #
22
- # <tt>@appending_directory</tt>: a string specifying the path where
23
- # all some files for appending to the archive will be created.
24
- #
25
- # <tt>@extraction_directory</tt>: a string specifying the path where
26
- # the archive's files will be extracted.
27
- #
28
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
29
- # Copyright:: Copyright (c) 2008 infochimps.org
30
- # License:: GPL 3.0
31
- # Website:: http://infinitemonkeywrench.org/
32
- #
33
- require File.join(File.dirname(__FILE__),'../../../spec_helper')
34
- require IMW_SPEC_DIR+'/imw/matchers/archive_contents_matcher'
35
- require IMW_SPEC_DIR+'/imw/matchers/directory_contents_matcher'
36
-
37
- require 'imw/utils/random'
38
- require 'imw/utils/extensions/find'
39
- share_examples_for "an archive of files" do
40
- include Spec::Matchers::IMW
41
-
42
- def create_random_files
43
- IMW::Random.directory_with_files(@initial_directory)
44
- IMW::Random.directory_with_files(@appending_directory)
45
- FileUtils.mkdir(@extraction_directory)
46
- end
47
-
48
- def delete_random_files
49
- FileUtils.rm_rf [@root_directory,@extraction_directory]
50
- end
51
-
52
- before(:each) do
53
- create_random_files
54
- end
55
-
56
- after(:each) do
57
- delete_random_files
58
- FileUtils.rm(@archive.path) if @archive.exist?
59
- end
60
-
61
- describe "(listing)" do
62
- it "should raise an error when listing a non-existent archive" do
63
- lambda { @archive.contents }.should raise_error(IMW::Error)
64
- end
65
- end
66
-
67
- describe "(creation)" do
68
-
69
- it "should be able to create archives which match a directory's structure" do
70
- @archive.create(@initial_directory + "/*")
71
- @archive.should contain_paths_like(@initial_directory, :relative_to => @root_directory)
72
- end
73
-
74
- it "should raise an error if trying to overwrite an archive without the :force option" do
75
- @archive.create(@initial_directory + "/*")
76
- lambda { @archive.create(@initial_directory + "/*") }.should raise_error(IMW::Error)
77
- end
78
-
79
- it "should overwrite an archive if the :force option is given" do
80
- @archive.create(@initial_directory + "/*")
81
- @archive.create(@initial_directory + "/*", :force => true)
82
- @archive.should contain_paths_like(@initial_directory, :relative_to => @root_directory)
83
- end
84
- end
85
-
86
- describe "(appending)" do
87
-
88
- it "should append to an archive which already exists" do
89
- @archive.create(@initial_directory + "/*")
90
- @archive.append(@appending_directory + "/*")
91
- @archive.should contain_paths_like([@initial_directory,@appending_directory], :relative_to => @root_directory)
92
- end
93
-
94
- it "should append to an archive which doesn't already exist" do
95
- @archive.append(@appending_directory + "/*")
96
- @archive.should contain_paths_like(@appending_directory, :relative_to => @root_directory)
97
- end
98
-
99
- end
100
-
101
- describe "(extracting)" do
102
-
103
- it "should raise an error when trying to extract from a non-existing archive" do
104
- lambda { @archive.extract }.should raise_error(IMW::Error)
105
- end
106
-
107
- it "should extract files which match the original ones it archived" do
108
- @archive.create(@initial_directory + "/*")
109
- @archive.append(@appending_directory + "/*")
110
- new_archive = @archive.cp(@extraction_directory + '/' + @archive.basename)
111
- new_archive.extract
112
- @extraction_directory.should contain_files_matching_directory(@root_directory)
113
- end
114
-
115
- end
116
- end unless defined? IMW_FILES_ARCHIVE_SHARED_SPEC
117
-
118
- # puts "#{File.basename(__FILE__)}: How many drunken frat boys can fit in an Internet kiosk?" # at bottom