imw 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. data/.gitignore +4 -1
  2. data/Rakefile +10 -0
  3. data/TODO +18 -0
  4. data/VERSION +1 -1
  5. data/bin/imw +1 -1
  6. data/etc/imwrc.rb +0 -50
  7. data/examples/dataset.rb +12 -0
  8. data/lib/imw/boot.rb +55 -9
  9. data/lib/imw/dataset/paths.rb +15 -24
  10. data/lib/imw/dataset/workflow.rb +131 -72
  11. data/lib/imw/dataset.rb +94 -186
  12. data/lib/imw/parsers/html_parser.rb +1 -1
  13. data/lib/imw/parsers.rb +1 -1
  14. data/lib/imw/repository.rb +3 -27
  15. data/lib/imw/resource.rb +190 -0
  16. data/lib/imw/resources/archive.rb +97 -0
  17. data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
  18. data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
  19. data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
  20. data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
  21. data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
  22. data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
  23. data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
  24. data/lib/imw/resources/archives_and_compressed.rb +32 -0
  25. data/lib/imw/resources/compressed_file.rb +89 -0
  26. data/lib/imw/resources/compressible.rb +77 -0
  27. data/lib/imw/resources/formats/delimited.rb +92 -0
  28. data/lib/imw/resources/formats/excel.rb +125 -0
  29. data/lib/imw/resources/formats/json.rb +53 -0
  30. data/lib/imw/resources/formats/sgml.rb +72 -0
  31. data/lib/imw/resources/formats/yaml.rb +53 -0
  32. data/lib/imw/resources/formats.rb +32 -0
  33. data/lib/imw/resources/local.rb +198 -0
  34. data/lib/imw/resources/remote.rb +110 -0
  35. data/lib/imw/resources/schemes/hdfs.rb +242 -0
  36. data/lib/imw/resources/schemes/http.rb +161 -0
  37. data/lib/imw/resources/schemes/s3.rb +137 -0
  38. data/lib/imw/resources/schemes.rb +19 -0
  39. data/lib/imw/resources.rb +118 -0
  40. data/lib/imw/runner.rb +5 -4
  41. data/lib/imw/transforms/archiver.rb +215 -0
  42. data/lib/imw/transforms/transferer.rb +103 -0
  43. data/lib/imw/transforms.rb +8 -0
  44. data/lib/imw/utils/error.rb +26 -30
  45. data/lib/imw/utils/extensions/array.rb +5 -15
  46. data/lib/imw/utils/extensions/hash.rb +6 -16
  47. data/lib/imw/utils/extensions/hpricot.rb +0 -14
  48. data/lib/imw/utils/extensions/string.rb +5 -15
  49. data/lib/imw/utils/extensions/symbol.rb +0 -13
  50. data/lib/imw/utils/extensions.rb +65 -0
  51. data/lib/imw/utils/log.rb +14 -13
  52. data/lib/imw/utils/misc.rb +0 -6
  53. data/lib/imw/utils/paths.rb +101 -42
  54. data/lib/imw/utils/version.rb +8 -9
  55. data/lib/imw/utils.rb +2 -18
  56. data/lib/imw.rb +92 -17
  57. data/spec/data/sample.csv +1 -1
  58. data/spec/data/sample.json +1 -0
  59. data/spec/data/sample.tsv +1 -1
  60. data/spec/data/sample.txt +1 -1
  61. data/spec/data/sample.xml +1 -1
  62. data/spec/data/sample.yaml +1 -1
  63. data/spec/imw/dataset/paths_spec.rb +32 -0
  64. data/spec/imw/dataset/workflow_spec.rb +41 -0
  65. data/spec/imw/resource_spec.rb +79 -0
  66. data/spec/imw/resources/archive_spec.rb +69 -0
  67. data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
  68. data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
  69. data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
  70. data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
  71. data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
  72. data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
  73. data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
  74. data/spec/imw/resources/compressed_file_spec.rb +48 -0
  75. data/spec/imw/resources/compressible_spec.rb +36 -0
  76. data/spec/imw/resources/formats/delimited_spec.rb +33 -0
  77. data/spec/imw/resources/formats/json_spec.rb +32 -0
  78. data/spec/imw/resources/formats/sgml_spec.rb +24 -0
  79. data/spec/imw/resources/formats/yaml_spec.rb +41 -0
  80. data/spec/imw/resources/local_spec.rb +98 -0
  81. data/spec/imw/resources/remote_spec.rb +35 -0
  82. data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
  83. data/spec/imw/resources/schemes/http_spec.rb +19 -0
  84. data/spec/imw/resources/schemes/s3_spec.rb +19 -0
  85. data/spec/imw/transforms/archiver_spec.rb +120 -0
  86. data/spec/imw/transforms/transferer_spec.rb +113 -0
  87. data/spec/imw/utils/paths_spec.rb +5 -33
  88. data/spec/imw/utils/shared_paths_spec.rb +29 -0
  89. data/spec/spec_helper.rb +5 -5
  90. data/spec/support/paths_matcher.rb +67 -0
  91. data/spec/support/random.rb +39 -36
  92. metadata +88 -75
  93. data/lib/imw/dataset/task.rb +0 -41
  94. data/lib/imw/files/archive.rb +0 -113
  95. data/lib/imw/files/basicfile.rb +0 -122
  96. data/lib/imw/files/binary.rb +0 -28
  97. data/lib/imw/files/compressed_file.rb +0 -93
  98. data/lib/imw/files/compressed_files_and_archives.rb +0 -334
  99. data/lib/imw/files/compressible.rb +0 -103
  100. data/lib/imw/files/csv.rb +0 -113
  101. data/lib/imw/files/directory.rb +0 -62
  102. data/lib/imw/files/excel.rb +0 -84
  103. data/lib/imw/files/json.rb +0 -41
  104. data/lib/imw/files/sgml.rb +0 -46
  105. data/lib/imw/files/text.rb +0 -68
  106. data/lib/imw/files/yaml.rb +0 -46
  107. data/lib/imw/files.rb +0 -125
  108. data/lib/imw/packagers/archiver.rb +0 -126
  109. data/lib/imw/packagers/s3_mover.rb +0 -36
  110. data/lib/imw/packagers.rb +0 -8
  111. data/lib/imw/utils/components.rb +0 -61
  112. data/lib/imw/utils/config.rb +0 -46
  113. data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
  114. data/lib/imw/utils/extensions/core.rb +0 -27
  115. data/lib/imw/utils/extensions/dir.rb +0 -24
  116. data/lib/imw/utils/extensions/file_core.rb +0 -64
  117. data/lib/imw/utils/extensions/typed_struct.rb +0 -22
  118. data/lib/imw/utils/extensions/uri.rb +0 -59
  119. data/lib/imw/utils/view/dump_csv.rb +0 -112
  120. data/lib/imw/utils/view/dump_csv_older.rb +0 -117
  121. data/lib/imw/utils/view.rb +0 -113
  122. data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
  123. data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
  124. data/spec/imw/files/archive_spec.rb +0 -118
  125. data/spec/imw/files/basicfile_spec.rb +0 -121
  126. data/spec/imw/files/bz2_spec.rb +0 -32
  127. data/spec/imw/files/compressed_file_spec.rb +0 -96
  128. data/spec/imw/files/compressible_spec.rb +0 -100
  129. data/spec/imw/files/file_spec.rb +0 -144
  130. data/spec/imw/files/gz_spec.rb +0 -32
  131. data/spec/imw/files/rar_spec.rb +0 -33
  132. data/spec/imw/files/tar_spec.rb +0 -31
  133. data/spec/imw/files/text_spec.rb +0 -23
  134. data/spec/imw/files/zip_spec.rb +0 -31
  135. data/spec/imw/files_spec.rb +0 -38
  136. data/spec/imw/packagers/archiver_spec.rb +0 -125
  137. data/spec/imw/packagers/s3_mover_spec.rb +0 -7
  138. data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
  139. data/spec/imw/utils/extensions/find_spec.rb +0 -113
  140. data/spec/imw/workflow/rip/local_spec.rb +0 -89
  141. data/spec/imw/workflow/rip_spec.rb +0 -27
  142. data/spec/support/archive_contents_matcher.rb +0 -94
  143. data/spec/support/directory_contents_matcher.rb +0 -61
@@ -1,41 +0,0 @@
1
-
2
- # h2. lib/imw/files/json.rb -- describes json files
3
- #
4
- # == About
5
- #
6
- # A class for working with JSON files.
7
- #
8
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
9
- # Copyright:: Copyright (c) 2008 infochimps.org
10
- # License:: GPL 3.0
11
- # Website:: http://infinitemonkeywrench.org/
12
- #
13
- # puts "#{File.basename(__FILE__)}: Yet another clever comment." # at bottobm
14
-
15
- require 'json'
16
- require 'imw/files/text'
17
-
18
- module IMW
19
- module Files
20
-
21
- class Json < IMW::Files::Text
22
-
23
- def initialize uri, mode='r', options = {}
24
- super uri, mode
25
- end
26
-
27
- # Return the contents of this JSON file.
28
- #
29
- # FIXME what to do if a block is passed in?
30
- def load &block
31
- JSON.parse File.new(@path).read
32
- end
33
-
34
- # Dump +data+ to this file as JSON.
35
- def dump data
36
- super data.to_json
37
- end
38
- end
39
- FILE_REGEXPS << [/\.json$/, IMW::Files::Json]
40
- end
41
- end
@@ -1,46 +0,0 @@
1
- require 'hpricot'
2
- require 'imw/files/text'
3
- require 'imw/parsers/html_parser'
4
-
5
- module IMW
6
- module Files
7
-
8
- module Sgml
9
-
10
- attr_accessor :doc
11
-
12
- # Delegate to Hpricot
13
- def method_missing method, *args, &block
14
- @doc.send method, *args, &block
15
- end
16
-
17
- # Parse this file using the IMW::Parsers::HtmlParser. The
18
- # parser can either be passed in directly or constructed from a
19
- # passed hash of specs and/or matchers.
20
- def parse *args
21
- parser = args.first.is_a?(IMW::Parsers::HtmlParser) ? args.first : IMW::Parsers::HtmlParser.new(*args)
22
- parser.parse(self)
23
- end
24
-
25
- end
26
-
27
- class Xml < IMW::Files::Text
28
- include Sgml
29
- def initialize uri, mode='r', options={}
30
- super uri, mode, options
31
- @doc = Hpricot.XML(open(uri))
32
- end
33
- end
34
-
35
- class Html < IMW::Files::Text
36
- include Sgml
37
- def initialize uri, mode='r', options={}
38
- super uri, mode, options
39
- @doc = Hpricot(open(uri))
40
- end
41
- end
42
- end
43
- end
44
-
45
-
46
-
@@ -1,68 +0,0 @@
1
- module IMW
2
- module Files
3
-
4
- # Used to process text files when no more specialized class is suitable.
5
- #
6
- # f = IMW::Files::Text.new '/path/to/my_file.dat'
7
- # f.load do |line|
8
- # # ...
9
- # end
10
- #
11
- # Missing methods will be passed to the associated file handle
12
- # (either IO or StringIO depending on whether the URI passed in
13
- # was local or remote) so the usual stuff like read or each_line
14
- # still works.
15
- class Text
16
-
17
- include IMW::Files::BasicFile
18
- include IMW::Files::Compressible
19
-
20
- attr_reader :file, :parser
21
-
22
- def initialize uri, mode='r', options = {}
23
- self.uri= uri
24
- raise IMW::PathError.new("Cannot write to remote file #{uri}") if mode == 'w' && remote?
25
- @file = open(uri, mode)
26
- end
27
-
28
- # Return the contents of this text file as a string.
29
- def load
30
- file.read
31
- end
32
-
33
- # Return an array with each line of this file. If given a
34
- # block, pass each line to the block.
35
- def entries &block
36
- if block_given?
37
- file.each do |line|
38
- yield line.chomp
39
- end
40
- else
41
- file.map do |line|
42
- line.chomp
43
- end
44
- end
45
- end
46
-
47
- # Dump +data+ to this file as a string. Close the file handle
48
- # if passed in :close.
49
- def dump data, options={}
50
- file.write(data.inspect)
51
- file.close if options[:close]
52
- end
53
-
54
- def method_missing method, *args
55
- file.send method, *args
56
- end
57
-
58
- def parse parser_spec, &block
59
- lines = parser_spec.delete(:lines)
60
- @parser = IMW::Parsers::RegexpParser.new(parser_spec)
61
- parser.parse!(file, {:lines => lines}, &block)
62
- end
63
-
64
- end
65
- end
66
- end
67
-
68
- # puts "#{File.basename(__FILE__)}: Don't forget to put a nametag on your Monkeywrench or one of the other chimps might steal it!" # at bottom
@@ -1,46 +0,0 @@
1
- #
2
- # h2. lib/imw/files/yaml.rb -- describes yaml files
3
- #
4
- # == About
5
- #
6
- # A class for working with YAML files.
7
- #
8
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
9
- # Copyright:: Copyright (c) 2008 infochimps.org
10
- # License:: GPL 3.0
11
- # Website:: http://infinitemonkeywrench.org/
12
- #
13
-
14
- require 'yaml'
15
- require 'imw/files/text'
16
-
17
- module IMW
18
- module Files
19
-
20
- class Yaml < IMW::Files::Text
21
-
22
- def initialize uri, mode='r', options = {}
23
- super uri, mode
24
- end
25
-
26
- # Return the contents of this YAML file.
27
- #
28
- # FIXME what to do if a block is passed in?
29
- def load &block
30
- YAML.load_file @path
31
- end
32
-
33
- # Dump +data+ to this file as YAML.
34
- def dump data
35
- super data.to_yaml
36
- end
37
-
38
- end
39
-
40
- FILE_REGEXPS << [/\.yaml$/, IMW::Files::Yaml]
41
- FILE_REGEXPS << [/\.yml$/, IMW::Files::Yaml]
42
-
43
- end
44
- end
45
-
46
- # puts "#{File.basename(__FILE__)}: Yet another clever comment." # at bottobm
data/lib/imw/files.rb DELETED
@@ -1,125 +0,0 @@
1
- require 'uri'
2
- require 'open-uri'
3
- require 'imw/utils'
4
- require 'imw/files/basicfile'
5
- require 'imw/files/directory'
6
- require 'imw/files/archive'
7
- require 'imw/files/compressible'
8
- require 'imw/files/compressed_file'
9
-
10
- module IMW
11
-
12
- # Parse +path+ and return an appropriate handler. Pass in <tt>:write
13
- # => true</tt> to open for writing.
14
- #
15
- # IMW.open("/tmp/test.csv") # => IMW::Files::Csv("/tmp/test.csv')
16
- #
17
- #
18
- def self.open path, options = {}, &block
19
- if File.directory?(File.expand_path(path))
20
- dir = Files::Directory.new(path)
21
- yield dir if block_given?
22
- dir
23
- else
24
- mode = options[:write] ? 'w' : 'r'
25
- file = Files.file_class_for(path, options).new(path, mode, options)
26
- yield file if block_given?
27
- file
28
- end
29
- end
30
-
31
- def self.open! path, options = {}, &block
32
- self.open path, options.reverse_merge(:write => true)
33
- end
34
-
35
- module Files
36
-
37
-
38
- # There is certainly a cleaner way to do this.
39
- autoload :Text, 'imw/files/text'
40
- autoload :Binary, 'imw/files/binary'
41
- autoload :Yaml, 'imw/files/yaml'
42
- autoload :Csv, 'imw/files/csv'
43
- autoload :Json, 'imw/files/json'
44
- autoload :Bz2, 'imw/files/compressed_files_and_archives'
45
- autoload :Gz, 'imw/files/compressed_files_and_archives'
46
- autoload :Tar, 'imw/files/compressed_files_and_archives'
47
- autoload :Tarbz2, 'imw/files/compressed_files_and_archives'
48
- autoload :Targz, 'imw/files/compressed_files_and_archives'
49
- autoload :Rar, 'imw/files/compressed_files_and_archives'
50
- autoload :Zip, 'imw/files/compressed_files_and_archives'
51
- autoload :Xml, 'imw/files/sgml'
52
- autoload :Html, 'imw/files/sgml'
53
- autoload :Excel, 'imw/files/excel'
54
-
55
-
56
- # An array used to match files to classes to handle them. The
57
- # first element of each array is the regexp and the second names
58
- # the class to handle the file.
59
- #
60
- # IMW::Files::EXTENSION_HANDLERS << [ /\.csv$/, :csv ] #=> IMW::Files::Csv
61
- # IMW::Files::EXTENSION_HANDLERS << [ /\.txt$/, "Text" ] #=> IMW::Files::Text
62
- # IMW::Files::EXTENSION_HANDLERS << [ /\.myclass%/, MyClass ] #=> MyClass
63
- #
64
- # Elements at the end of the array have greater precedence which
65
- # allows, say, <tt>.tar.gz</tt> to be handled differently from
66
- # <tt>.gz</tt>.
67
- EXTENSION_HANDLERS = [
68
- [/\.txt$/, :text],
69
- [/\.txt$/, :text],
70
- [/\.dat$/, :text],
71
- [/\.ascii$/, :text],
72
- [/\.yaml$/, :yaml],
73
- [/\.yml$/, :yaml],
74
- [/\.csv$/, :csv],
75
- [/\.tsv$/, :tsv],
76
- [/\.json$/, :json],
77
- [/\.bz2$/, :bz2],
78
- [/\.gz$/, :gz],
79
- [/\.tar\.bz2$/, :tarbz2],
80
- [/\.tbz2$/, :tarbz2],
81
- [/\.tar\.gz$/, :targz],
82
- [/\.tgz$/, :targz],
83
- [/\.tar$/, :tar],
84
- [/\.rar$/, :rar],
85
- [/\.zip$/, :zip],
86
- [/\.xml$/, :xml],
87
- [/\.html$/, :html],
88
- [/\.htm$/, :html],
89
- [/\.xlsx?$/, :excel]
90
- ]
91
-
92
- SCHEME_HANDLERS = [
93
- [/http/, :html]
94
- ]
95
-
96
- protected
97
- def self.file_class_for path, options = {}
98
- klass = options.delete(:as)
99
-
100
- # try to choose klass from path extension if not already set
101
- unless klass
102
- EXTENSION_HANDLERS.reverse_each do |regexp, thing| # end has greater precedence
103
- next unless regexp =~ path
104
- klass = thing
105
- break
106
- end
107
- end
108
-
109
- # try to choose klass from uri scheme if not already set
110
- unless klass
111
- scheme = URI.parse(path).scheme
112
- SCHEME_HANDLERS.reverse_each do |regexp, thing| # end has greater precedence
113
- next unless regexp =~ scheme
114
- klass = thing
115
- break
116
- end
117
- end
118
-
119
- # just stick with text if still not set
120
- klass = :text unless klass
121
-
122
- klass.is_a?(Class) ? klass : class_eval(klass.to_s.downcase.capitalize)
123
- end
124
- end
125
- end
@@ -1,126 +0,0 @@
1
- module IMW
2
- module Packagers
3
-
4
- # Packages an Array of input files into a single output archive.
5
- # When the archive is extracted, all the input files given will be
6
- # in a single directory with a chosen name. The path to the output
7
- # archive determines both the name of the archive and its type (tar,
8
- # tar.bz2, zip, &c.).
9
- #
10
- # If any of the input files are themselves archives, they will first
11
- # be extracted, with only their contents winding up in the final
12
- # directory (the file hierarchy of the archive will be preserved).
13
- # If any of the input files are compressed, they will first be
14
- # uncompressed before being added to the directory.
15
- #
16
- # Input files can be renamed by passing in a Hash instead of an
17
- # Array. Each key in this hash is the path to an input file and its
18
- # value is the new basename to give it. If the basename is +nil+
19
- # then the original path's basename will be used.
20
- class Archiver
21
-
22
- attr_accessor :name, :inputs
23
-
24
- def initialize name, inputs
25
- @name = name
26
- add_inputs inputs
27
- end
28
-
29
- # FIXME Instead of requiring +new_inputs+ to be either an Array
30
- # or Hash just iterate through whatever it is using +each+ and
31
- # see if the iterate can be interpreted as a mapping between
32
- # strings.
33
- def add_inputs new_inputs
34
- @inputs ||= {}
35
- if new_inputs.is_a?(Array)
36
- new_inputs.each do |input|
37
- @inputs[File.expand_path(input)] = File.basename(input)
38
- end
39
- else
40
- new_inputs.each_pair do |input, basename|
41
- @inputs[File.expand_path(input)] = (basename || File.basename(input))
42
- end
43
- end
44
- end
45
-
46
- def errors
47
- @errors ||= []
48
- end
49
-
50
- def add_processing_error error
51
- IMW.logger.warn error
52
- errors << error
53
- end
54
-
55
- def success?
56
- errors.empty?
57
- end
58
-
59
- # A temporary directory to work in. Its contents will
60
- # ultimately consist of a directory named for the package
61
- # containing all the input files.
62
- def tmp_dir
63
- @tmp_dir ||= File.join(IMW.path_to(:tmp_root, 'packager'), (Time.now.to_i.to_s + "-" + $$.to_s)) # guaranteed unique on a node
64
- end
65
-
66
- def clean!
67
- FileUtils.rm_rf(tmp_dir)
68
- end
69
-
70
- # A directory which will contain all the content being packaged,
71
- # including the contents of any archives that were included in
72
- # the list of files to process.
73
- def dir
74
- @dir ||= File.join(tmp_dir, name.to_s)
75
- end
76
-
77
- # FIXME This needs to be made idempotent -- calling prepare
78
- # twice should not do any work the second time (unless the user
79
- # is insistent and passes a :force option -- or maybe use bang
80
- # and not-bang versions of the method for this distinction).
81
- def prepare!
82
- FileUtils.mkdir_p dir unless File.exist?(dir)
83
- inputs.each_pair do |path, basename|
84
- new_path = File.join(dir, basename)
85
- file = IMW.open(path, :as => IMW::Files.file_class_for(basename)) # file's original path is meaningless: RackMultipart20091203-958-1nkgc61-0
86
- case
87
- when file.archive?
88
- FileUtils.cd(dir) do
89
- file.extract
90
- end
91
- when file.compressed?
92
- file.cp(new_path).decompress!
93
- else
94
- file.cp(new_path)
95
- end
96
- end
97
- end
98
-
99
- # Package the contents of the temporary directory to an archive
100
- # at +output+ but return exceptions instead of raising them.
101
- def package output, options={}
102
- begin
103
- package! output, options={}
104
- rescue RuntimeError => e
105
- return e
106
- end
107
- end
108
-
109
- # Package the contents of the temporary directory to an archive
110
- # at +output+.
111
- def package! output, options={}
112
- output = IMW.open(output) if output.is_a?(String)
113
- FileUtils.mkdir_p(output.dirname) unless File.exist?(output.dirname)
114
- output.rm! if output.exist?
115
- FileUtils.cd(tmp_dir) do
116
- temp_output = IMW.open(output.basename)
117
- packaged_output = temp_output.create(name.to_s + '/*').mv(output.path)
118
- temp_output.rm if temp_output.exist?
119
- add_processing_error "Archiver: couldn't create archive #{output.path}" unless output.exists?
120
- end
121
- output
122
- end
123
- end
124
- end
125
- end
126
-
@@ -1,36 +0,0 @@
1
- require 'aws/s3'
2
- module IMW
3
- module Packagers
4
- class S3Mover
5
-
6
- attr_reader :last_response
7
- attr_accessor :bucket_name
8
-
9
- def initialize options={}
10
- @bucket_name = options.delete(:bucket_name)
11
- AWS::S3::Base.establish_connection!(options)
12
- end
13
-
14
- def success?
15
- errors.empty?
16
- end
17
-
18
- def success?
19
- last_response && last_response.response.class == Net::HTTPOK
20
- end
21
-
22
- def upload local_path, remote_path
23
- begin
24
- upload! local_path, remote_path
25
- rescue RuntimeError => e
26
- return e
27
- end
28
- end
29
-
30
- def upload! local_path, remote_path
31
- @last_response = AWS::S3::S3Object.store(remote_path, open(local_path), bucket_name)
32
- end
33
-
34
- end
35
- end
36
- end
data/lib/imw/packagers.rb DELETED
@@ -1,8 +0,0 @@
1
- module IMW
2
- module Packagers
3
- autoload :Archiver, 'imw/packagers/archiver'
4
- autoload :S3Mover, 'imw/packagers/s3_mover'
5
- end
6
- end
7
-
8
-
@@ -1,61 +0,0 @@
1
- #
2
- # h2. lib/imw/utils/components.rb -- define separate components of IMW
3
- #
4
- # == About
5
- #
6
- # Defines a hash <tt>IMW::COMPONENTS</tt> which keys component names
7
- # to the files to be required to implement each component and defines
8
- # methods to load these files.
9
- #
10
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
11
- # Copyright:: Copyright (c) 2008 infochimps.org
12
- # License:: GPL 3.0
13
- # Website:: http://infinitemonkeywrench.org/
14
- #
15
- # puts "#{File.basename(__FILE__)}: Something clever" # at bottom
16
-
17
- require 'imw/utils/error'
18
-
19
- module IMW
20
-
21
- # Defines IMW components and the files required by each. Components
22
- # can be accessed using <tt>IMW.load_components</tt> or
23
- # <tt>IMW#imw_components</tt>.
24
- COMPONENTS = {
25
- :datamapper => ["imw/dataset/datamapper","imw/dataset/datamapper/time_and_user_stamps"],
26
- :data_mapper => :datamapper,
27
- :html_parser => "imw/parsers/html_parser",
28
- :flat_file_parser => "imw/parsers/flat_file_parser",
29
- :line_parser => "imw/parsers/line_parser",
30
- :infochimps => ["imw/infochimps/infochimps_resource","imw/infochimps/icss"]
31
- }
32
-
33
- # Load components of IMW as needed,
34
- #
35
- # IMW.load_components :datamapper, :flat_file_parser
36
- def self.load_components *args
37
- args.each do |component_name|
38
- begin
39
- require component_name.to_s
40
- rescue LoadError
41
- component = IMW::COMPONENTS[component_name]
42
- raise IMW::Error.new("#{component_name} is an invalid IMW component. See IMW::COMPONENTS.") unless component
43
- if component.is_a? Array then
44
- IMW.load_components *component
45
- else
46
- IMW.load_components component
47
- end
48
- end
49
- end
50
- end
51
-
52
- # Load components of IMW as needed,
53
- #
54
- # include IMW
55
- # imw_components :datamapper, :flat_file_parser
56
- def imw_components *args
57
- IMW.load_components *args
58
- end
59
-
60
- end
61
-
@@ -1,46 +0,0 @@
1
- #
2
- # h2. lib/imw/utils/config.rb -- configuration parsing
3
- #
4
- # == About
5
- #
6
- # IMW looks for configuration settings in the following places, in
7
- # order of increasing precedence:
8
- #
9
- # 1. Settings defined directly in this file.
10
- #
11
- # 2. From the <tt>etc/imwrc</tt> file in the IMW root directory.
12
- #
13
- # 3. From the <tt>.imwrc</tt> file in the user's home directory (the
14
- # filename can be changed; see
15
- # <tt>IMW::Config::USER_CONFIG_FILE_BASENAME</tt>).
16
- #
17
- # 4. From the file defined by the environment variable +IMWRC+ (the
18
- # value can be changed; see
19
- # <tt>IMW::Config::USER_CONFIG_FILE_ENV_VARIABLE</tt>
20
- #
21
- # Settings not found in one configuration location will be searched
22
- # for in locations of lesser precedence.
23
- #
24
- # *Note:* configuration files are plain Ruby code that will be directly
25
- # evaluated.
26
- #
27
- # Relevant settings include
28
- #
29
- # * interfaces with external programs (+tar+, +wget+, &c.)
30
- # * paths to directories where IMW reads/writes files
31
- # * correspondences between file extensions and IMW file classes
32
- #
33
- # For more detailed information, see the default configuration file,
34
- # <tt>etc/imwrc</tt>.
35
- #
36
- #
37
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
38
- # Copyright:: Copyright (c) 2008 infochimps.org
39
- # License:: GPL 3.0
40
- # Website:: http://infinitemonkeywrench.org/
41
- #
42
-
43
- module IMW
44
- end
45
-
46
- # puts "#{File.basename(__FILE__)}: You carefully adjust the settings on your Monkeywrench. Glob-monsters: beware!!" # at bottom
@@ -1,8 +0,0 @@
1
- # :nodoc:
2
- # for when cattr_accessor is all you need
3
- #
4
- require 'active_support/core_ext/array/extract_options'
5
- class Array #:nodoc:
6
- include ActiveSupport::CoreExtensions::Array::ExtractOptions
7
- end
8
- require 'active_support/core_ext/class/attribute_accessors'
@@ -1,27 +0,0 @@
1
- require 'imw/utils/extensions/string'
2
- require 'imw/utils/extensions/array'
3
- require 'imw/utils/extensions/hash'
4
- require 'imw/utils/extensions/dir'
5
- require 'imw/utils/extensions/struct'
6
- require 'imw/utils/extensions/symbol'
7
- require 'imw/utils/extensions/file_core'
8
- require 'active_support/core_ext/module/aliasing'
9
- require 'active_support/core_ext/object/blank'
10
- require 'active_support/core_ext/object/misc'
11
- #require 'active_support/core_ext/blank.rb'
12
- require 'imw/utils/extensions/class/attribute_accessors'
13
- # require 'ostruct'
14
- require 'set'
15
-
16
- module IMW
17
- # A replacement for the standard system call which raises an
18
- # IMW::SystemCallError if the command fails as well as printing the
19
- # command appended to the end of <tt>error_message</tt>.
20
- def self.system *commands
21
- command = commands.flatten.join ' '
22
- Kernel.system(command)
23
- raise IMW::SystemCallError.new(command) unless $?.success?
24
- end
25
- end
26
-
27
-
@@ -1,24 +0,0 @@
1
- #
2
- # h2. lib/imw/utils/extensions/dir.rb -- directory extensions
3
- #
4
- # == About
5
- #
6
- # The Ruby +Dir+ module is rubbish. Time to clean it up a bit!
7
- #
8
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
9
- # Copyright:: Copyright (c) 2008 infochimps.org
10
- # License:: GPL 3.0
11
- # Website:: http://infinitemonkeywrench.org/
12
- #
13
-
14
-
15
- class Dir
16
-
17
- # Return the absolute paths of files and directories in the
18
- # directory, leaving out `.' and `..' entries.
19
- def abs_contents
20
- self.entries.map {|entry| File.join(self.path,entry) unless entry == '.' || entry == '..'}.compact
21
- end
22
- end
23
-
24
- # puts "#{File.basename(__FILE__)}: You open the folder and see along list of names. Some have been crossed out -- ominously..." # at bottom