imw 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. data/.gitignore +4 -1
  2. data/Rakefile +10 -0
  3. data/TODO +18 -0
  4. data/VERSION +1 -1
  5. data/bin/imw +1 -1
  6. data/etc/imwrc.rb +0 -50
  7. data/examples/dataset.rb +12 -0
  8. data/lib/imw/boot.rb +55 -9
  9. data/lib/imw/dataset/paths.rb +15 -24
  10. data/lib/imw/dataset/workflow.rb +131 -72
  11. data/lib/imw/dataset.rb +94 -186
  12. data/lib/imw/parsers/html_parser.rb +1 -1
  13. data/lib/imw/parsers.rb +1 -1
  14. data/lib/imw/repository.rb +3 -27
  15. data/lib/imw/resource.rb +190 -0
  16. data/lib/imw/resources/archive.rb +97 -0
  17. data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
  18. data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
  19. data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
  20. data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
  21. data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
  22. data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
  23. data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
  24. data/lib/imw/resources/archives_and_compressed.rb +32 -0
  25. data/lib/imw/resources/compressed_file.rb +89 -0
  26. data/lib/imw/resources/compressible.rb +77 -0
  27. data/lib/imw/resources/formats/delimited.rb +92 -0
  28. data/lib/imw/resources/formats/excel.rb +125 -0
  29. data/lib/imw/resources/formats/json.rb +53 -0
  30. data/lib/imw/resources/formats/sgml.rb +72 -0
  31. data/lib/imw/resources/formats/yaml.rb +53 -0
  32. data/lib/imw/resources/formats.rb +32 -0
  33. data/lib/imw/resources/local.rb +198 -0
  34. data/lib/imw/resources/remote.rb +110 -0
  35. data/lib/imw/resources/schemes/hdfs.rb +242 -0
  36. data/lib/imw/resources/schemes/http.rb +161 -0
  37. data/lib/imw/resources/schemes/s3.rb +137 -0
  38. data/lib/imw/resources/schemes.rb +19 -0
  39. data/lib/imw/resources.rb +118 -0
  40. data/lib/imw/runner.rb +5 -4
  41. data/lib/imw/transforms/archiver.rb +215 -0
  42. data/lib/imw/transforms/transferer.rb +103 -0
  43. data/lib/imw/transforms.rb +8 -0
  44. data/lib/imw/utils/error.rb +26 -30
  45. data/lib/imw/utils/extensions/array.rb +5 -15
  46. data/lib/imw/utils/extensions/hash.rb +6 -16
  47. data/lib/imw/utils/extensions/hpricot.rb +0 -14
  48. data/lib/imw/utils/extensions/string.rb +5 -15
  49. data/lib/imw/utils/extensions/symbol.rb +0 -13
  50. data/lib/imw/utils/extensions.rb +65 -0
  51. data/lib/imw/utils/log.rb +14 -13
  52. data/lib/imw/utils/misc.rb +0 -6
  53. data/lib/imw/utils/paths.rb +101 -42
  54. data/lib/imw/utils/version.rb +8 -9
  55. data/lib/imw/utils.rb +2 -18
  56. data/lib/imw.rb +92 -17
  57. data/spec/data/sample.csv +1 -1
  58. data/spec/data/sample.json +1 -0
  59. data/spec/data/sample.tsv +1 -1
  60. data/spec/data/sample.txt +1 -1
  61. data/spec/data/sample.xml +1 -1
  62. data/spec/data/sample.yaml +1 -1
  63. data/spec/imw/dataset/paths_spec.rb +32 -0
  64. data/spec/imw/dataset/workflow_spec.rb +41 -0
  65. data/spec/imw/resource_spec.rb +79 -0
  66. data/spec/imw/resources/archive_spec.rb +69 -0
  67. data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
  68. data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
  69. data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
  70. data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
  71. data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
  72. data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
  73. data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
  74. data/spec/imw/resources/compressed_file_spec.rb +48 -0
  75. data/spec/imw/resources/compressible_spec.rb +36 -0
  76. data/spec/imw/resources/formats/delimited_spec.rb +33 -0
  77. data/spec/imw/resources/formats/json_spec.rb +32 -0
  78. data/spec/imw/resources/formats/sgml_spec.rb +24 -0
  79. data/spec/imw/resources/formats/yaml_spec.rb +41 -0
  80. data/spec/imw/resources/local_spec.rb +98 -0
  81. data/spec/imw/resources/remote_spec.rb +35 -0
  82. data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
  83. data/spec/imw/resources/schemes/http_spec.rb +19 -0
  84. data/spec/imw/resources/schemes/s3_spec.rb +19 -0
  85. data/spec/imw/transforms/archiver_spec.rb +120 -0
  86. data/spec/imw/transforms/transferer_spec.rb +113 -0
  87. data/spec/imw/utils/paths_spec.rb +5 -33
  88. data/spec/imw/utils/shared_paths_spec.rb +29 -0
  89. data/spec/spec_helper.rb +5 -5
  90. data/spec/support/paths_matcher.rb +67 -0
  91. data/spec/support/random.rb +39 -36
  92. metadata +88 -75
  93. data/lib/imw/dataset/task.rb +0 -41
  94. data/lib/imw/files/archive.rb +0 -113
  95. data/lib/imw/files/basicfile.rb +0 -122
  96. data/lib/imw/files/binary.rb +0 -28
  97. data/lib/imw/files/compressed_file.rb +0 -93
  98. data/lib/imw/files/compressed_files_and_archives.rb +0 -334
  99. data/lib/imw/files/compressible.rb +0 -103
  100. data/lib/imw/files/csv.rb +0 -113
  101. data/lib/imw/files/directory.rb +0 -62
  102. data/lib/imw/files/excel.rb +0 -84
  103. data/lib/imw/files/json.rb +0 -41
  104. data/lib/imw/files/sgml.rb +0 -46
  105. data/lib/imw/files/text.rb +0 -68
  106. data/lib/imw/files/yaml.rb +0 -46
  107. data/lib/imw/files.rb +0 -125
  108. data/lib/imw/packagers/archiver.rb +0 -126
  109. data/lib/imw/packagers/s3_mover.rb +0 -36
  110. data/lib/imw/packagers.rb +0 -8
  111. data/lib/imw/utils/components.rb +0 -61
  112. data/lib/imw/utils/config.rb +0 -46
  113. data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
  114. data/lib/imw/utils/extensions/core.rb +0 -27
  115. data/lib/imw/utils/extensions/dir.rb +0 -24
  116. data/lib/imw/utils/extensions/file_core.rb +0 -64
  117. data/lib/imw/utils/extensions/typed_struct.rb +0 -22
  118. data/lib/imw/utils/extensions/uri.rb +0 -59
  119. data/lib/imw/utils/view/dump_csv.rb +0 -112
  120. data/lib/imw/utils/view/dump_csv_older.rb +0 -117
  121. data/lib/imw/utils/view.rb +0 -113
  122. data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
  123. data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
  124. data/spec/imw/files/archive_spec.rb +0 -118
  125. data/spec/imw/files/basicfile_spec.rb +0 -121
  126. data/spec/imw/files/bz2_spec.rb +0 -32
  127. data/spec/imw/files/compressed_file_spec.rb +0 -96
  128. data/spec/imw/files/compressible_spec.rb +0 -100
  129. data/spec/imw/files/file_spec.rb +0 -144
  130. data/spec/imw/files/gz_spec.rb +0 -32
  131. data/spec/imw/files/rar_spec.rb +0 -33
  132. data/spec/imw/files/tar_spec.rb +0 -31
  133. data/spec/imw/files/text_spec.rb +0 -23
  134. data/spec/imw/files/zip_spec.rb +0 -31
  135. data/spec/imw/files_spec.rb +0 -38
  136. data/spec/imw/packagers/archiver_spec.rb +0 -125
  137. data/spec/imw/packagers/s3_mover_spec.rb +0 -7
  138. data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
  139. data/spec/imw/utils/extensions/find_spec.rb +0 -113
  140. data/spec/imw/workflow/rip/local_spec.rb +0 -89
  141. data/spec/imw/workflow/rip_spec.rb +0 -27
  142. data/spec/support/archive_contents_matcher.rb +0 -94
  143. data/spec/support/directory_contents_matcher.rb +0 -61
@@ -1,41 +0,0 @@
1
-
2
- # h2. lib/imw/files/json.rb -- describes json files
3
- #
4
- # == About
5
- #
6
- # A class for working with JSON files.
7
- #
8
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
9
- # Copyright:: Copyright (c) 2008 infochimps.org
10
- # License:: GPL 3.0
11
- # Website:: http://infinitemonkeywrench.org/
12
- #
13
- # puts "#{File.basename(__FILE__)}: Yet another clever comment." # at bottobm
14
-
15
- require 'json'
16
- require 'imw/files/text'
17
-
18
- module IMW
19
- module Files
20
-
21
- class Json < IMW::Files::Text
22
-
23
- def initialize uri, mode='r', options = {}
24
- super uri, mode
25
- end
26
-
27
- # Return the contents of this JSON file.
28
- #
29
- # FIXME what to do if a block is passed in?
30
- def load &block
31
- JSON.parse File.new(@path).read
32
- end
33
-
34
- # Dump +data+ to this file as JSON.
35
- def dump data
36
- super data.to_json
37
- end
38
- end
39
- FILE_REGEXPS << [/\.json$/, IMW::Files::Json]
40
- end
41
- end
@@ -1,46 +0,0 @@
1
- require 'hpricot'
2
- require 'imw/files/text'
3
- require 'imw/parsers/html_parser'
4
-
5
- module IMW
6
- module Files
7
-
8
- module Sgml
9
-
10
- attr_accessor :doc
11
-
12
- # Delegate to Hpricot
13
- def method_missing method, *args, &block
14
- @doc.send method, *args, &block
15
- end
16
-
17
- # Parse this file using the IMW::Parsers::HtmlParser. The
18
- # parser can either be passed in directly or constructed from a
19
- # passed hash of specs and/or matchers.
20
- def parse *args
21
- parser = args.first.is_a?(IMW::Parsers::HtmlParser) ? args.first : IMW::Parsers::HtmlParser.new(*args)
22
- parser.parse(self)
23
- end
24
-
25
- end
26
-
27
- class Xml < IMW::Files::Text
28
- include Sgml
29
- def initialize uri, mode='r', options={}
30
- super uri, mode, options
31
- @doc = Hpricot.XML(open(uri))
32
- end
33
- end
34
-
35
- class Html < IMW::Files::Text
36
- include Sgml
37
- def initialize uri, mode='r', options={}
38
- super uri, mode, options
39
- @doc = Hpricot(open(uri))
40
- end
41
- end
42
- end
43
- end
44
-
45
-
46
-
@@ -1,68 +0,0 @@
1
- module IMW
2
- module Files
3
-
4
- # Used to process text files when no more specialized class is suitable.
5
- #
6
- # f = IMW::Files::Text.new '/path/to/my_file.dat'
7
- # f.load do |line|
8
- # # ...
9
- # end
10
- #
11
- # Missing methods will be passed to the associated file handle
12
- # (either IO or StringIO depending on whether the URI passed in
13
- # was local or remote) so the usual stuff like read or each_line
14
- # still works.
15
- class Text
16
-
17
- include IMW::Files::BasicFile
18
- include IMW::Files::Compressible
19
-
20
- attr_reader :file, :parser
21
-
22
- def initialize uri, mode='r', options = {}
23
- self.uri= uri
24
- raise IMW::PathError.new("Cannot write to remote file #{uri}") if mode == 'w' && remote?
25
- @file = open(uri, mode)
26
- end
27
-
28
- # Return the contents of this text file as a string.
29
- def load
30
- file.read
31
- end
32
-
33
- # Return an array with each line of this file. If given a
34
- # block, pass each line to the block.
35
- def entries &block
36
- if block_given?
37
- file.each do |line|
38
- yield line.chomp
39
- end
40
- else
41
- file.map do |line|
42
- line.chomp
43
- end
44
- end
45
- end
46
-
47
- # Dump +data+ to this file as a string. Close the file handle
48
- # if passed in :close.
49
- def dump data, options={}
50
- file.write(data.inspect)
51
- file.close if options[:close]
52
- end
53
-
54
- def method_missing method, *args
55
- file.send method, *args
56
- end
57
-
58
- def parse parser_spec, &block
59
- lines = parser_spec.delete(:lines)
60
- @parser = IMW::Parsers::RegexpParser.new(parser_spec)
61
- parser.parse!(file, {:lines => lines}, &block)
62
- end
63
-
64
- end
65
- end
66
- end
67
-
68
- # puts "#{File.basename(__FILE__)}: Don't forget to put a nametag on your Monkeywrench or one of the other chimps might steal it!" # at bottom
@@ -1,46 +0,0 @@
1
- #
2
- # h2. lib/imw/files/yaml.rb -- describes yaml files
3
- #
4
- # == About
5
- #
6
- # A class for working with YAML files.
7
- #
8
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
9
- # Copyright:: Copyright (c) 2008 infochimps.org
10
- # License:: GPL 3.0
11
- # Website:: http://infinitemonkeywrench.org/
12
- #
13
-
14
- require 'yaml'
15
- require 'imw/files/text'
16
-
17
- module IMW
18
- module Files
19
-
20
- class Yaml < IMW::Files::Text
21
-
22
- def initialize uri, mode='r', options = {}
23
- super uri, mode
24
- end
25
-
26
- # Return the contents of this YAML file.
27
- #
28
- # FIXME what to do if a block is passed in?
29
- def load &block
30
- YAML.load_file @path
31
- end
32
-
33
- # Dump +data+ to this file as YAML.
34
- def dump data
35
- super data.to_yaml
36
- end
37
-
38
- end
39
-
40
- FILE_REGEXPS << [/\.yaml$/, IMW::Files::Yaml]
41
- FILE_REGEXPS << [/\.yml$/, IMW::Files::Yaml]
42
-
43
- end
44
- end
45
-
46
- # puts "#{File.basename(__FILE__)}: Yet another clever comment." # at bottobm
data/lib/imw/files.rb DELETED
@@ -1,125 +0,0 @@
1
- require 'uri'
2
- require 'open-uri'
3
- require 'imw/utils'
4
- require 'imw/files/basicfile'
5
- require 'imw/files/directory'
6
- require 'imw/files/archive'
7
- require 'imw/files/compressible'
8
- require 'imw/files/compressed_file'
9
-
10
- module IMW
11
-
12
- # Parse +path+ and return an appropriate handler. Pass in <tt>:write
13
- # => true</tt> to open for writing.
14
- #
15
- # IMW.open("/tmp/test.csv") # => IMW::Files::Csv("/tmp/test.csv')
16
- #
17
- #
18
- def self.open path, options = {}, &block
19
- if File.directory?(File.expand_path(path))
20
- dir = Files::Directory.new(path)
21
- yield dir if block_given?
22
- dir
23
- else
24
- mode = options[:write] ? 'w' : 'r'
25
- file = Files.file_class_for(path, options).new(path, mode, options)
26
- yield file if block_given?
27
- file
28
- end
29
- end
30
-
31
- def self.open! path, options = {}, &block
32
- self.open path, options.reverse_merge(:write => true)
33
- end
34
-
35
- module Files
36
-
37
-
38
- # There is certainly a cleaner way to do this.
39
- autoload :Text, 'imw/files/text'
40
- autoload :Binary, 'imw/files/binary'
41
- autoload :Yaml, 'imw/files/yaml'
42
- autoload :Csv, 'imw/files/csv'
43
- autoload :Json, 'imw/files/json'
44
- autoload :Bz2, 'imw/files/compressed_files_and_archives'
45
- autoload :Gz, 'imw/files/compressed_files_and_archives'
46
- autoload :Tar, 'imw/files/compressed_files_and_archives'
47
- autoload :Tarbz2, 'imw/files/compressed_files_and_archives'
48
- autoload :Targz, 'imw/files/compressed_files_and_archives'
49
- autoload :Rar, 'imw/files/compressed_files_and_archives'
50
- autoload :Zip, 'imw/files/compressed_files_and_archives'
51
- autoload :Xml, 'imw/files/sgml'
52
- autoload :Html, 'imw/files/sgml'
53
- autoload :Excel, 'imw/files/excel'
54
-
55
-
56
- # An array used to match files to classes to handle them. The
57
- # first element of each array is the regexp and the second names
58
- # the class to handle the file.
59
- #
60
- # IMW::Files::EXTENSION_HANDLERS << [ /\.csv$/, :csv ] #=> IMW::Files::Csv
61
- # IMW::Files::EXTENSION_HANDLERS << [ /\.txt$/, "Text" ] #=> IMW::Files::Text
62
- # IMW::Files::EXTENSION_HANDLERS << [ /\.myclass%/, MyClass ] #=> MyClass
63
- #
64
- # Elements at the end of the array have greater precedence which
65
- # allows, say, <tt>.tar.gz</tt> to be handled differently from
66
- # <tt>.gz</tt>.
67
- EXTENSION_HANDLERS = [
68
- [/\.txt$/, :text],
69
- [/\.txt$/, :text],
70
- [/\.dat$/, :text],
71
- [/\.ascii$/, :text],
72
- [/\.yaml$/, :yaml],
73
- [/\.yml$/, :yaml],
74
- [/\.csv$/, :csv],
75
- [/\.tsv$/, :tsv],
76
- [/\.json$/, :json],
77
- [/\.bz2$/, :bz2],
78
- [/\.gz$/, :gz],
79
- [/\.tar\.bz2$/, :tarbz2],
80
- [/\.tbz2$/, :tarbz2],
81
- [/\.tar\.gz$/, :targz],
82
- [/\.tgz$/, :targz],
83
- [/\.tar$/, :tar],
84
- [/\.rar$/, :rar],
85
- [/\.zip$/, :zip],
86
- [/\.xml$/, :xml],
87
- [/\.html$/, :html],
88
- [/\.htm$/, :html],
89
- [/\.xlsx?$/, :excel]
90
- ]
91
-
92
- SCHEME_HANDLERS = [
93
- [/http/, :html]
94
- ]
95
-
96
- protected
97
- def self.file_class_for path, options = {}
98
- klass = options.delete(:as)
99
-
100
- # try to choose klass from path extension if not already set
101
- unless klass
102
- EXTENSION_HANDLERS.reverse_each do |regexp, thing| # end has greater precedence
103
- next unless regexp =~ path
104
- klass = thing
105
- break
106
- end
107
- end
108
-
109
- # try to choose klass from uri scheme if not already set
110
- unless klass
111
- scheme = URI.parse(path).scheme
112
- SCHEME_HANDLERS.reverse_each do |regexp, thing| # end has greater precedence
113
- next unless regexp =~ scheme
114
- klass = thing
115
- break
116
- end
117
- end
118
-
119
- # just stick with text if still not set
120
- klass = :text unless klass
121
-
122
- klass.is_a?(Class) ? klass : class_eval(klass.to_s.downcase.capitalize)
123
- end
124
- end
125
- end
@@ -1,126 +0,0 @@
1
- module IMW
2
- module Packagers
3
-
4
- # Packages an Array of input files into a single output archive.
5
- # When the archive is extracted, all the input files given will be
6
- # in a single directory with a chosen name. The path to the output
7
- # archive determines both the name of the archive and its type (tar,
8
- # tar.bz2, zip, &c.).
9
- #
10
- # If any of the input files are themselves archives, they will first
11
- # be extracted, with only their contents winding up in the final
12
- # directory (the file hierarchy of the archive will be preserved).
13
- # If any of the input files are compressed, they will first be
14
- # uncompressed before being added to the directory.
15
- #
16
- # Input files can be renamed by passing in a Hash instead of an
17
- # Array. Each key in this hash is the path to an input file and its
18
- # value is the new basename to give it. If the basename is +nil+
19
- # then the original path's basename will be used.
20
- class Archiver
21
-
22
- attr_accessor :name, :inputs
23
-
24
- def initialize name, inputs
25
- @name = name
26
- add_inputs inputs
27
- end
28
-
29
- # FIXME Instead of requiring +new_inputs+ to be either an Array
30
- # or Hash just iterate through whatever it is using +each+ and
31
- # see if the iterate can be interpreted as a mapping between
32
- # strings.
33
- def add_inputs new_inputs
34
- @inputs ||= {}
35
- if new_inputs.is_a?(Array)
36
- new_inputs.each do |input|
37
- @inputs[File.expand_path(input)] = File.basename(input)
38
- end
39
- else
40
- new_inputs.each_pair do |input, basename|
41
- @inputs[File.expand_path(input)] = (basename || File.basename(input))
42
- end
43
- end
44
- end
45
-
46
- def errors
47
- @errors ||= []
48
- end
49
-
50
- def add_processing_error error
51
- IMW.logger.warn error
52
- errors << error
53
- end
54
-
55
- def success?
56
- errors.empty?
57
- end
58
-
59
- # A temporary directory to work in. Its contents will
60
- # ultimately consist of a directory named for the package
61
- # containing all the input files.
62
- def tmp_dir
63
- @tmp_dir ||= File.join(IMW.path_to(:tmp_root, 'packager'), (Time.now.to_i.to_s + "-" + $$.to_s)) # guaranteed unique on a node
64
- end
65
-
66
- def clean!
67
- FileUtils.rm_rf(tmp_dir)
68
- end
69
-
70
- # A directory which will contain all the content being packaged,
71
- # including the contents of any archives that were included in
72
- # the list of files to process.
73
- def dir
74
- @dir ||= File.join(tmp_dir, name.to_s)
75
- end
76
-
77
- # FIXME This needs to be made idempotent -- calling prepare
78
- # twice should not do any work the second time (unless the user
79
- # is insistent and passes a :force option -- or maybe use bang
80
- # and not-bang versions of the method for this distinction).
81
- def prepare!
82
- FileUtils.mkdir_p dir unless File.exist?(dir)
83
- inputs.each_pair do |path, basename|
84
- new_path = File.join(dir, basename)
85
- file = IMW.open(path, :as => IMW::Files.file_class_for(basename)) # file's original path is meaningless: RackMultipart20091203-958-1nkgc61-0
86
- case
87
- when file.archive?
88
- FileUtils.cd(dir) do
89
- file.extract
90
- end
91
- when file.compressed?
92
- file.cp(new_path).decompress!
93
- else
94
- file.cp(new_path)
95
- end
96
- end
97
- end
98
-
99
- # Package the contents of the temporary directory to an archive
100
- # at +output+ but return exceptions instead of raising them.
101
- def package output, options={}
102
- begin
103
- package! output, options={}
104
- rescue RuntimeError => e
105
- return e
106
- end
107
- end
108
-
109
- # Package the contents of the temporary directory to an archive
110
- # at +output+.
111
- def package! output, options={}
112
- output = IMW.open(output) if output.is_a?(String)
113
- FileUtils.mkdir_p(output.dirname) unless File.exist?(output.dirname)
114
- output.rm! if output.exist?
115
- FileUtils.cd(tmp_dir) do
116
- temp_output = IMW.open(output.basename)
117
- packaged_output = temp_output.create(name.to_s + '/*').mv(output.path)
118
- temp_output.rm if temp_output.exist?
119
- add_processing_error "Archiver: couldn't create archive #{output.path}" unless output.exists?
120
- end
121
- output
122
- end
123
- end
124
- end
125
- end
126
-
@@ -1,36 +0,0 @@
1
- require 'aws/s3'
2
- module IMW
3
- module Packagers
4
- class S3Mover
5
-
6
- attr_reader :last_response
7
- attr_accessor :bucket_name
8
-
9
- def initialize options={}
10
- @bucket_name = options.delete(:bucket_name)
11
- AWS::S3::Base.establish_connection!(options)
12
- end
13
-
14
- def success?
15
- errors.empty?
16
- end
17
-
18
- def success?
19
- last_response && last_response.response.class == Net::HTTPOK
20
- end
21
-
22
- def upload local_path, remote_path
23
- begin
24
- upload! local_path, remote_path
25
- rescue RuntimeError => e
26
- return e
27
- end
28
- end
29
-
30
- def upload! local_path, remote_path
31
- @last_response = AWS::S3::S3Object.store(remote_path, open(local_path), bucket_name)
32
- end
33
-
34
- end
35
- end
36
- end
data/lib/imw/packagers.rb DELETED
@@ -1,8 +0,0 @@
1
- module IMW
2
- module Packagers
3
- autoload :Archiver, 'imw/packagers/archiver'
4
- autoload :S3Mover, 'imw/packagers/s3_mover'
5
- end
6
- end
7
-
8
-
@@ -1,61 +0,0 @@
1
- #
2
- # h2. lib/imw/utils/components.rb -- define separate components of IMW
3
- #
4
- # == About
5
- #
6
- # Defines a hash <tt>IMW::COMPONENTS</tt> which keys component names
7
- # to the files to be required to implement each component and defines
8
- # methods to load these files.
9
- #
10
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
11
- # Copyright:: Copyright (c) 2008 infochimps.org
12
- # License:: GPL 3.0
13
- # Website:: http://infinitemonkeywrench.org/
14
- #
15
- # puts "#{File.basename(__FILE__)}: Something clever" # at bottom
16
-
17
- require 'imw/utils/error'
18
-
19
- module IMW
20
-
21
- # Defines IMW components and the files required by each. Components
22
- # can be accessed using <tt>IMW.load_components</tt> or
23
- # <tt>IMW#imw_components</tt>.
24
- COMPONENTS = {
25
- :datamapper => ["imw/dataset/datamapper","imw/dataset/datamapper/time_and_user_stamps"],
26
- :data_mapper => :datamapper,
27
- :html_parser => "imw/parsers/html_parser",
28
- :flat_file_parser => "imw/parsers/flat_file_parser",
29
- :line_parser => "imw/parsers/line_parser",
30
- :infochimps => ["imw/infochimps/infochimps_resource","imw/infochimps/icss"]
31
- }
32
-
33
- # Load components of IMW as needed,
34
- #
35
- # IMW.load_components :datamapper, :flat_file_parser
36
- def self.load_components *args
37
- args.each do |component_name|
38
- begin
39
- require component_name.to_s
40
- rescue LoadError
41
- component = IMW::COMPONENTS[component_name]
42
- raise IMW::Error.new("#{component_name} is an invalid IMW component. See IMW::COMPONENTS.") unless component
43
- if component.is_a? Array then
44
- IMW.load_components *component
45
- else
46
- IMW.load_components component
47
- end
48
- end
49
- end
50
- end
51
-
52
- # Load components of IMW as needed,
53
- #
54
- # include IMW
55
- # imw_components :datamapper, :flat_file_parser
56
- def imw_components *args
57
- IMW.load_components *args
58
- end
59
-
60
- end
61
-
@@ -1,46 +0,0 @@
1
- #
2
- # h2. lib/imw/utils/config.rb -- configuration parsing
3
- #
4
- # == About
5
- #
6
- # IMW looks for configuration settings in the following places, in
7
- # order of increasing precedence:
8
- #
9
- # 1. Settings defined directly in this file.
10
- #
11
- # 2. From the <tt>etc/imwrc</tt> file in the IMW root directory.
12
- #
13
- # 3. From the <tt>.imwrc</tt> file in the user's home directory (the
14
- # filename can be changed; see
15
- # <tt>IMW::Config::USER_CONFIG_FILE_BASENAME</tt>).
16
- #
17
- # 4. From the file defined by the environment variable +IMWRC+ (the
18
- # value can be changed; see
19
- # <tt>IMW::Config::USER_CONFIG_FILE_ENV_VARIABLE</tt>
20
- #
21
- # Settings not found in one configuration location will be searched
22
- # for in locations of lesser precedence.
23
- #
24
- # *Note:* configuration files are plain Ruby code that will be directly
25
- # evaluated.
26
- #
27
- # Relevant settings include
28
- #
29
- # * interfaces with external programs (+tar+, +wget+, &c.)
30
- # * paths to directories where IMW reads/writes files
31
- # * correspondences between file extensions and IMW file classes
32
- #
33
- # For more detailed information, see the default configuration file,
34
- # <tt>etc/imwrc</tt>.
35
- #
36
- #
37
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
38
- # Copyright:: Copyright (c) 2008 infochimps.org
39
- # License:: GPL 3.0
40
- # Website:: http://infinitemonkeywrench.org/
41
- #
42
-
43
- module IMW
44
- end
45
-
46
- # puts "#{File.basename(__FILE__)}: You carefully adjust the settings on your Monkeywrench. Glob-monsters: beware!!" # at bottom
@@ -1,8 +0,0 @@
1
- # :nodoc:
2
- # for when cattr_accessor is all you need
3
- #
4
- require 'active_support/core_ext/array/extract_options'
5
- class Array #:nodoc:
6
- include ActiveSupport::CoreExtensions::Array::ExtractOptions
7
- end
8
- require 'active_support/core_ext/class/attribute_accessors'
@@ -1,27 +0,0 @@
1
- require 'imw/utils/extensions/string'
2
- require 'imw/utils/extensions/array'
3
- require 'imw/utils/extensions/hash'
4
- require 'imw/utils/extensions/dir'
5
- require 'imw/utils/extensions/struct'
6
- require 'imw/utils/extensions/symbol'
7
- require 'imw/utils/extensions/file_core'
8
- require 'active_support/core_ext/module/aliasing'
9
- require 'active_support/core_ext/object/blank'
10
- require 'active_support/core_ext/object/misc'
11
- #require 'active_support/core_ext/blank.rb'
12
- require 'imw/utils/extensions/class/attribute_accessors'
13
- # require 'ostruct'
14
- require 'set'
15
-
16
- module IMW
17
- # A replacement for the standard system call which raises an
18
- # IMW::SystemCallError if the command fails as well as printing the
19
- # command appended to the end of <tt>error_message</tt>.
20
- def self.system *commands
21
- command = commands.flatten.join ' '
22
- Kernel.system(command)
23
- raise IMW::SystemCallError.new(command) unless $?.success?
24
- end
25
- end
26
-
27
-
@@ -1,24 +0,0 @@
1
- #
2
- # h2. lib/imw/utils/extensions/dir.rb -- directory extensions
3
- #
4
- # == About
5
- #
6
- # The Ruby +Dir+ module is rubbish. Time to clean it up a bit!
7
- #
8
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
9
- # Copyright:: Copyright (c) 2008 infochimps.org
10
- # License:: GPL 3.0
11
- # Website:: http://infinitemonkeywrench.org/
12
- #
13
-
14
-
15
- class Dir
16
-
17
- # Return the absolute paths of files and directories in the
18
- # directory, leaving out `.' and `..' entries.
19
- def abs_contents
20
- self.entries.map {|entry| File.join(self.path,entry) unless entry == '.' || entry == '..'}.compact
21
- end
22
- end
23
-
24
- # puts "#{File.basename(__FILE__)}: You open the folder and see along list of names. Some have been crossed out -- ominously..." # at bottom