imw 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. data/.gitignore +4 -1
  2. data/Rakefile +10 -0
  3. data/TODO +18 -0
  4. data/VERSION +1 -1
  5. data/bin/imw +1 -1
  6. data/etc/imwrc.rb +0 -50
  7. data/examples/dataset.rb +12 -0
  8. data/lib/imw/boot.rb +55 -9
  9. data/lib/imw/dataset/paths.rb +15 -24
  10. data/lib/imw/dataset/workflow.rb +131 -72
  11. data/lib/imw/dataset.rb +94 -186
  12. data/lib/imw/parsers/html_parser.rb +1 -1
  13. data/lib/imw/parsers.rb +1 -1
  14. data/lib/imw/repository.rb +3 -27
  15. data/lib/imw/resource.rb +190 -0
  16. data/lib/imw/resources/archive.rb +97 -0
  17. data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
  18. data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
  19. data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
  20. data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
  21. data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
  22. data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
  23. data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
  24. data/lib/imw/resources/archives_and_compressed.rb +32 -0
  25. data/lib/imw/resources/compressed_file.rb +89 -0
  26. data/lib/imw/resources/compressible.rb +77 -0
  27. data/lib/imw/resources/formats/delimited.rb +92 -0
  28. data/lib/imw/resources/formats/excel.rb +125 -0
  29. data/lib/imw/resources/formats/json.rb +53 -0
  30. data/lib/imw/resources/formats/sgml.rb +72 -0
  31. data/lib/imw/resources/formats/yaml.rb +53 -0
  32. data/lib/imw/resources/formats.rb +32 -0
  33. data/lib/imw/resources/local.rb +198 -0
  34. data/lib/imw/resources/remote.rb +110 -0
  35. data/lib/imw/resources/schemes/hdfs.rb +242 -0
  36. data/lib/imw/resources/schemes/http.rb +161 -0
  37. data/lib/imw/resources/schemes/s3.rb +137 -0
  38. data/lib/imw/resources/schemes.rb +19 -0
  39. data/lib/imw/resources.rb +118 -0
  40. data/lib/imw/runner.rb +5 -4
  41. data/lib/imw/transforms/archiver.rb +215 -0
  42. data/lib/imw/transforms/transferer.rb +103 -0
  43. data/lib/imw/transforms.rb +8 -0
  44. data/lib/imw/utils/error.rb +26 -30
  45. data/lib/imw/utils/extensions/array.rb +5 -15
  46. data/lib/imw/utils/extensions/hash.rb +6 -16
  47. data/lib/imw/utils/extensions/hpricot.rb +0 -14
  48. data/lib/imw/utils/extensions/string.rb +5 -15
  49. data/lib/imw/utils/extensions/symbol.rb +0 -13
  50. data/lib/imw/utils/extensions.rb +65 -0
  51. data/lib/imw/utils/log.rb +14 -13
  52. data/lib/imw/utils/misc.rb +0 -6
  53. data/lib/imw/utils/paths.rb +101 -42
  54. data/lib/imw/utils/version.rb +8 -9
  55. data/lib/imw/utils.rb +2 -18
  56. data/lib/imw.rb +92 -17
  57. data/spec/data/sample.csv +1 -1
  58. data/spec/data/sample.json +1 -0
  59. data/spec/data/sample.tsv +1 -1
  60. data/spec/data/sample.txt +1 -1
  61. data/spec/data/sample.xml +1 -1
  62. data/spec/data/sample.yaml +1 -1
  63. data/spec/imw/dataset/paths_spec.rb +32 -0
  64. data/spec/imw/dataset/workflow_spec.rb +41 -0
  65. data/spec/imw/resource_spec.rb +79 -0
  66. data/spec/imw/resources/archive_spec.rb +69 -0
  67. data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
  68. data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
  69. data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
  70. data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
  71. data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
  72. data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
  73. data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
  74. data/spec/imw/resources/compressed_file_spec.rb +48 -0
  75. data/spec/imw/resources/compressible_spec.rb +36 -0
  76. data/spec/imw/resources/formats/delimited_spec.rb +33 -0
  77. data/spec/imw/resources/formats/json_spec.rb +32 -0
  78. data/spec/imw/resources/formats/sgml_spec.rb +24 -0
  79. data/spec/imw/resources/formats/yaml_spec.rb +41 -0
  80. data/spec/imw/resources/local_spec.rb +98 -0
  81. data/spec/imw/resources/remote_spec.rb +35 -0
  82. data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
  83. data/spec/imw/resources/schemes/http_spec.rb +19 -0
  84. data/spec/imw/resources/schemes/s3_spec.rb +19 -0
  85. data/spec/imw/transforms/archiver_spec.rb +120 -0
  86. data/spec/imw/transforms/transferer_spec.rb +113 -0
  87. data/spec/imw/utils/paths_spec.rb +5 -33
  88. data/spec/imw/utils/shared_paths_spec.rb +29 -0
  89. data/spec/spec_helper.rb +5 -5
  90. data/spec/support/paths_matcher.rb +67 -0
  91. data/spec/support/random.rb +39 -36
  92. metadata +88 -75
  93. data/lib/imw/dataset/task.rb +0 -41
  94. data/lib/imw/files/archive.rb +0 -113
  95. data/lib/imw/files/basicfile.rb +0 -122
  96. data/lib/imw/files/binary.rb +0 -28
  97. data/lib/imw/files/compressed_file.rb +0 -93
  98. data/lib/imw/files/compressed_files_and_archives.rb +0 -334
  99. data/lib/imw/files/compressible.rb +0 -103
  100. data/lib/imw/files/csv.rb +0 -113
  101. data/lib/imw/files/directory.rb +0 -62
  102. data/lib/imw/files/excel.rb +0 -84
  103. data/lib/imw/files/json.rb +0 -41
  104. data/lib/imw/files/sgml.rb +0 -46
  105. data/lib/imw/files/text.rb +0 -68
  106. data/lib/imw/files/yaml.rb +0 -46
  107. data/lib/imw/files.rb +0 -125
  108. data/lib/imw/packagers/archiver.rb +0 -126
  109. data/lib/imw/packagers/s3_mover.rb +0 -36
  110. data/lib/imw/packagers.rb +0 -8
  111. data/lib/imw/utils/components.rb +0 -61
  112. data/lib/imw/utils/config.rb +0 -46
  113. data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
  114. data/lib/imw/utils/extensions/core.rb +0 -27
  115. data/lib/imw/utils/extensions/dir.rb +0 -24
  116. data/lib/imw/utils/extensions/file_core.rb +0 -64
  117. data/lib/imw/utils/extensions/typed_struct.rb +0 -22
  118. data/lib/imw/utils/extensions/uri.rb +0 -59
  119. data/lib/imw/utils/view/dump_csv.rb +0 -112
  120. data/lib/imw/utils/view/dump_csv_older.rb +0 -117
  121. data/lib/imw/utils/view.rb +0 -113
  122. data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
  123. data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
  124. data/spec/imw/files/archive_spec.rb +0 -118
  125. data/spec/imw/files/basicfile_spec.rb +0 -121
  126. data/spec/imw/files/bz2_spec.rb +0 -32
  127. data/spec/imw/files/compressed_file_spec.rb +0 -96
  128. data/spec/imw/files/compressible_spec.rb +0 -100
  129. data/spec/imw/files/file_spec.rb +0 -144
  130. data/spec/imw/files/gz_spec.rb +0 -32
  131. data/spec/imw/files/rar_spec.rb +0 -33
  132. data/spec/imw/files/tar_spec.rb +0 -31
  133. data/spec/imw/files/text_spec.rb +0 -23
  134. data/spec/imw/files/zip_spec.rb +0 -31
  135. data/spec/imw/files_spec.rb +0 -38
  136. data/spec/imw/packagers/archiver_spec.rb +0 -125
  137. data/spec/imw/packagers/s3_mover_spec.rb +0 -7
  138. data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
  139. data/spec/imw/utils/extensions/find_spec.rb +0 -113
  140. data/spec/imw/workflow/rip/local_spec.rb +0 -89
  141. data/spec/imw/workflow/rip_spec.rb +0 -27
  142. data/spec/support/archive_contents_matcher.rb +0 -94
  143. data/spec/support/directory_contents_matcher.rb +0 -61
@@ -0,0 +1,18 @@
1
+ module IMW
2
+ module Resources
3
+ module CompressedFiles
4
+ module Gz
5
+
6
+ include IMW::Resources::CompressedFile
7
+
8
+ def compression_settings
9
+ @compression_settings ||= {
10
+ :decompression_program => :gunzip,
11
+ :decompress => '-fd'
12
+ }
13
+ end
14
+
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,23 @@
1
+ require 'imw/resources/archive'
2
+
3
+ module IMW
4
+ module Resources
5
+ module Archives
6
+ module Rar
7
+
8
+ include IMW::Resources::Archive
9
+
10
+ def archive_settings
11
+ @archive_settings ||= {
12
+ :program => :rar,
13
+ :create => ['a', '-o+', '-inul'],
14
+ :append => ['a', '-o+', '-inul'],
15
+ :list => "vb",
16
+ :extract => ['x', '-o+', '-inul']
17
+ }
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+
@@ -0,0 +1,23 @@
1
+ require 'imw/resources/archive'
2
+
3
+ module IMW
4
+ module Resources
5
+ module Archives
6
+ module Tar
7
+
8
+ include IMW::Resources::Archive
9
+
10
+ def archive_settings
11
+ @archive_settings ||= {
12
+ :create => "-cf",
13
+ :append => "-rf",
14
+ :list => "-tf",
15
+ :extract => "-xf",
16
+ :program => :tar
17
+ }
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+
@@ -0,0 +1,78 @@
1
+ require 'imw/resources/archive'
2
+ require 'imw/resources/compressed_file'
3
+
4
+ module IMW
5
+ module Resources
6
+ module Archives
7
+ module Tarbz2
8
+
9
+ #
10
+ # It's a compressed file
11
+ #
12
+
13
+ include IMW::Resources::CompressedFile
14
+
15
+ def compression_settings
16
+ @compression_settings ||= {
17
+ :program => :bzip2,
18
+ :decompression_program => :bunzip2,
19
+ :decompress => '',
20
+ :extension => 'bz2'
21
+ }
22
+ end
23
+
24
+ #
25
+ # But it's also an archive
26
+ #
27
+
28
+ include IMW::Resources::Archive
29
+
30
+ def archive_settings
31
+ @archive_settings ||= {
32
+ :program => :tar,
33
+ :create => '-cf',
34
+ :list => "-tjf",
35
+ :extract => "-xjf"
36
+ }
37
+ end
38
+
39
+ # Overrides default behvaior of IMW::Files::Archive#create to
40
+ # compress files after creating them.
41
+ def create *input_paths
42
+ IMW.system(archive_settings[:program], archive_settings[:create], path_between_archive_and_compression, *input_paths.flatten)
43
+ IMW.open(path_between_archive_and_compression).compress!
44
+ end
45
+
46
+ def decompressed_basename
47
+ case extname
48
+ when '.tar.bz2' then basename[0..-5] # .tar.bz2 => .tar
49
+ when '.tbz2' then basename.gsub(/tbz2$/, 'tar') # .tbz2 => .tar
50
+ else basename[0..-(extname.size + 1)]
51
+ end
52
+ end
53
+
54
+
55
+ protected
56
+ def path_between_archive_and_compression
57
+ File.join(dirname,name + '.tar')
58
+ end
59
+
60
+ public
61
+
62
+ #
63
+ # It's a compressed file AND an archive!
64
+ #
65
+
66
+ def extname
67
+ case path
68
+ when /\.tar\.bz2$/ then '.tar.bz2'
69
+ when /\.tbz2$/ then '.tbz2'
70
+ else File.extname(path)
71
+ end
72
+ end
73
+
74
+ end
75
+ end
76
+ end
77
+ end
78
+
@@ -0,0 +1,78 @@
1
+ require 'imw/resources/archive'
2
+ require 'imw/resources/compressed_file'
3
+
4
+ module IMW
5
+ module Resources
6
+ module Archives
7
+ module Targz
8
+
9
+ #
10
+ # It's a compressed file
11
+ #
12
+
13
+ include IMW::Resources::CompressedFile
14
+
15
+ def compression_settings
16
+ @compression_settings ||= {
17
+ :program => :gzip,
18
+ :decompression_program => :gunzip,
19
+ :decompress => '',
20
+ :extension => 'gz'
21
+ }
22
+ end
23
+
24
+ #
25
+ # But it's also an archive
26
+ #
27
+
28
+ include IMW::Resources::Archive
29
+
30
+ def archive_settings
31
+ @archive_settings ||= {
32
+ :program => :tar,
33
+ :list => "-tzf",
34
+ :create => '-cf',
35
+ :extract => "-xzf"
36
+ }
37
+ end
38
+
39
+ # Overrides default behvaior of IMW::Files::Archive#create to
40
+ # compress files after creating them.
41
+ def create *input_paths
42
+ IMW.system(archive_settings[:program], archive_settings[:create].split, path_between_archive_and_compression, *input_paths.flatten)
43
+ tar = IMW.open(path_between_archive_and_compression)
44
+ tar.compression_settings = compression_settings
45
+ tar.compress!
46
+ end
47
+
48
+ def decompressed_basename
49
+ case extname
50
+ when '.tar.gz' then basename[0..-4] # .tar.gz => .tar
51
+ when '.tgz' then basename.gsub(/tgz$/, 'tar') # .tgz => .tar
52
+ else basename[0..-(extname.size + 1)]
53
+ end
54
+ end
55
+
56
+ protected
57
+ def path_between_archive_and_compression
58
+ File.join(dirname,name + '.tar')
59
+ end
60
+ public
61
+
62
+ #
63
+ # It's both an archive and a compressed file!
64
+ #
65
+
66
+ def extname
67
+ case path
68
+ when /\.tar\.gz$/ then '.tar.gz'
69
+ when /\.tgz$/ then '.tgz'
70
+ else File.extname(path)
71
+ end
72
+ end
73
+
74
+ end
75
+ end
76
+ end
77
+ end
78
+
@@ -0,0 +1,57 @@
1
+ require 'imw/resources/archive'
2
+
3
+ module IMW
4
+ module Resources
5
+ module Archives
6
+ module Zip
7
+
8
+ include IMW::Resources::Archive
9
+
10
+ def archive_settings
11
+ @archive_settings ||= {
12
+ :program => :zip,
13
+ :create => "-qqr",
14
+ :append => "-qqg",
15
+ :list => "-l",
16
+ :extract => "-qqo",
17
+ :unarchiving_program => :unzip
18
+ }
19
+ end
20
+
21
+ protected
22
+
23
+ # The `unzip' program outputs data in a very annoying format:
24
+ #
25
+ # Archive: data.zip
26
+ # Length Date Time Name
27
+ # -------- ---- ---- ----
28
+ # 18510 07-28-08 15:58 data/4d7Qrgz7.csv
29
+ # 3418 07-28-08 15:41 data/7S.csv
30
+ # 23353 07-28-08 15:41 data/g.csv
31
+ # 711 07-28-08 15:58 data/g.xml
32
+ # 1095 07-28-08 15:41 data/L.xml
33
+ # 2399 07-28-08 15:58 data/mTAu9H3.xml
34
+ # 152 07-28-08 15:58 data/vaHBS2t5R.dat
35
+ # -------- -------
36
+ # 49638 7 files
37
+ #
38
+ # which is parsed by this method.
39
+ def archive_contents_string_to_array string
40
+ rows = string.split("\n")
41
+ # ignore the first 3 lines of the output and also discared the
42
+ # last 2 (5 = 2 + 3)
43
+ file_rows = rows[3,(rows.length - 5)]
44
+ file_rows.map do |row|
45
+ if row
46
+ columns = row.lstrip.rstrip.split(' ')
47
+ # grab the filename in the fourth column
48
+ columns[3..-1].join(' ')
49
+ end
50
+ end.compact
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+
@@ -0,0 +1,32 @@
1
+ module IMW
2
+ module Resources
3
+ autoload :Compressible, 'imw/resources/compressible'
4
+ autoload :CompressedFile, 'imw/resources/compressed_file'
5
+ autoload :Archive, 'imw/resources/archive'
6
+ autoload :Archives, 'imw/resources/archive'
7
+ autoload :CompressedFiles, 'imw/resources/compressed_file'
8
+
9
+ # Handlers which augment the resource with methods for archiving,
10
+ # extracting, compressing, decompressing...
11
+ ARCHIVE_AND_COMPRESSED_HANDLERS = [
12
+
13
+ # try compressible first -- compressed files below will override it
14
+ ["Compressible", Proc.new { |r| r.is_local? } ],
15
+
16
+ # order is important! -- tar.bz2 must come before .bz2, &c.
17
+ ["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.bz2$/ } ],
18
+ ["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tbz2$/ } ],
19
+ ["CompressedFiles::Bz2", Proc.new { |r| r.is_local? && r.path =~ /\.bz2$/ && r.path !~ /\.tar\.bz2$/ && r.path !~ /\.tbz2$/ } ],
20
+ ["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.gz$/ } ],
21
+ ["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tgz$/ } ],
22
+ ["CompressedFiles::Gz", Proc.new { |r| r.is_local? && r.path =~ /\.gz$/ && r.path !~ /\.tar\.gz$/ && r.path !~ /\.tgz$/ } ],
23
+ ["Archives::Tar", Proc.new { |r| r.is_local? && r.path =~ /\.tar$/ } ],
24
+ ["Archives::Rar", Proc.new { |r| r.is_local? && r.path =~ /\.rar$/ } ],
25
+ ["Archives::Zip", Proc.new { |r| r.is_local? && r.path =~ /\.zip$/ } ]
26
+
27
+ ]
28
+
29
+
30
+ end
31
+ end
32
+
@@ -0,0 +1,89 @@
1
+ module IMW
2
+ module Resources
3
+
4
+ module CompressedFiles
5
+ autoload :Bz2, 'imw/resources/archives_and_compressed/bz2'
6
+ autoload :Gz, 'imw/resources/archives_and_compressed/gz'
7
+ end
8
+
9
+ # Defines methods for decompressing a compressed file. This
10
+ # module isn't used to directly extend an IMW::Resource --
11
+ # instead, format specific modules (e.g. -
12
+ # IMW::Resources::CompressedFiles::Bz2) include this module and
13
+ # further define the command-line flags &c. needed to make
14
+ # everything work.
15
+ module CompressedFile
16
+
17
+ attr_accessor :compression_settings
18
+
19
+ # Is this file compressed?
20
+ #
21
+ # @return [true, false]
22
+ def is_compressed?
23
+ true
24
+ end
25
+
26
+ # Can this file be compressed?
27
+ #
28
+ # @return [true, false]
29
+ def is_compressible?
30
+ false
31
+ end
32
+
33
+ # The basename of this resource after it is decompressed
34
+ #
35
+ # IMW::Resource.new('/path/to/my_file.txt.bz2').decompressed_basename
36
+ # => 'my_file.txt'
37
+ #
38
+ # @return [String] the decompressed basename
39
+ def decompressed_basename
40
+ basename[0..-(extname.size + 1)]
41
+ end
42
+
43
+ # The path of this resource after it is decompressed
44
+ #
45
+ # IMW::Resource.new('/path/to/my_file.txt.bz2').decompressed_basename
46
+ # => '/path/to/my_file.txt'
47
+ #
48
+ # @return [String] the decompressed path
49
+ def decompressed_path
50
+ File.join(dirname, decompressed_basename)
51
+ end
52
+
53
+ # Decompress this file in its present directory overwriting any
54
+ # existing files and without saving the original compressed
55
+ # file.
56
+ #
57
+ # @return [IMW::Resource] the decompressed resource
58
+ def decompress!
59
+ should_exist!("Cannot decompress.")
60
+ program = compression_settings[:decompression_program] || compression_settings[:program]
61
+ FileUtils.cd(dirname) { IMW.system(program, compression_settings[:decompress], path) }
62
+ IMW.open(decompressed_path)
63
+ end
64
+
65
+ # Decompress this file in its present directory, overwriting any
66
+ # existing files while keeping the original compressed file.
67
+ #
68
+ # FIXME The implementation is a little stupid as the file is
69
+ # needlessly copied.
70
+ #
71
+ # @return [IMW::Resource] the decompressed resource
72
+ def decompress
73
+ should_exist!("Cannot decompress.")
74
+ begin
75
+ copy = cp(path + '.imw_copy')
76
+ regular_file = decompress!
77
+ copy.mv(path)
78
+ regular_file
79
+ ensure
80
+ copy.mv(path) if copy && copy.exist?
81
+ end
82
+ end
83
+
84
+ end
85
+ end
86
+ end
87
+
88
+
89
+
@@ -0,0 +1,77 @@
1
+ module IMW
2
+
3
+ # Default settings used when compressing files. <tt>:program</tt>
4
+ # defines the name of the command-line program to use,
5
+ # <tt>:compress</tt> gives the flags to use when compressing, and
6
+ # <tt>:extension</tt> gives the extension (_without_ the `.') added
7
+ # by the program after compressing.
8
+ COMPRESSION_SETTINGS = {
9
+ :program => 'bzip2',
10
+ :compress => '',
11
+ :extension => 'bz2'
12
+ } unless defined?(COMPRESSION_SETTINGS)
13
+
14
+ module Resources
15
+
16
+ # Defines methods for compressing a file. The default compression
17
+ # program is defined in IMW::COMPRESSION_SETTINGS though a
18
+ # particular resource can change the values in its
19
+ # +compression_settings+ hash.
20
+ module Compressible
21
+
22
+ # Compression settings.
23
+ attr_accessor :compression_settings
24
+
25
+ # Is this file compressible?
26
+ #
27
+ # @return [true]
28
+ def is_compressible?
29
+ true
30
+ end
31
+
32
+ # Defines the compression settings used for this
33
+ # resource. <tt>:program</tt> defines the name of the
34
+ # command-line program to use, <tt>:compress</tt> gives the
35
+ # flags to use when compressing, and <tt>:extension</tt> gives
36
+ # the extension (_without_ the `.') added by the program after
37
+ # compressing.
38
+ #
39
+ # @return [Hash]
40
+ def compression_settings
41
+ @compression_settings ||= COMPRESSION_SETTINGS
42
+ end
43
+
44
+ # Compress this resource in place, overwriting it.
45
+ #
46
+ # This resource's +compression_settings+ method is used to
47
+ # determine the method of compression.
48
+ #
49
+ # @return [IMW::Resource] the compressed file
50
+ def compress!
51
+ should_exist!("Cannot compress.")
52
+ IMW.system(*[compression_settings[:program], compression_settings[:compress], path])
53
+ IMW.open(File.join(dirname,basename + "." + compression_settings[:extension]))
54
+ end
55
+
56
+ # Compress this resource without overwriting it.
57
+ #
58
+ # FIXME The implementation is a little stupid as the file is
59
+ # needlessly copied.
60
+ #
61
+ # @return [IMW::Resource] the compressed file
62
+ def compress options={}
63
+ should_exist!("Cannot compress.")
64
+ begin
65
+ copy = cp(path + '.imw_copy')
66
+ compressed_file = compress!
67
+ copy.mv(path)
68
+ compressed_file
69
+ ensure
70
+ copy.mv(path) if copy.exist?
71
+ end
72
+ end
73
+
74
+ end
75
+ end
76
+ end
77
+
@@ -0,0 +1,92 @@
1
+ module IMW
2
+ module Resources
3
+ module Formats
4
+
5
+ # Defines methods used for parsing and writing delimited data
6
+ # formats (CSV, TSV, &c.) with the FasterCSV library. This
7
+ # module is not used to directly extend a resource. Instead,
8
+ # more specific modules (e.g. - IMW::Resources::Formats::Csv)
9
+ # include this one and also define +delimited_options+ which is
10
+ # actually what's passed to FasterCSV.
11
+ #
12
+ # @abstract
13
+ module Delimited
14
+
15
+ attr_accessor :delimited_settings
16
+
17
+ # Return the data in this delimited resource as an array of
18
+ # arrays.
19
+ #
20
+ # Yield each outer array (row) if passed a block.
21
+ #
22
+ # @return [Array] the full data matrix
23
+ # @yield [Array] each row of the data
24
+ def load &block
25
+ require 'fastercsv'
26
+ FasterCSV.parse(read, delimited_options, &block)
27
+ end
28
+
29
+ # Map each row in this delimited resource.
30
+ #
31
+ # @yield [Array] each row of the data
32
+ def map &block
33
+ load.map(&block)
34
+ end
35
+
36
+ # Dump an array of arrays into this resource.
37
+ #
38
+ # @param [Array] data array of arrays to dump
39
+ # @param [Hash] options
40
+ # @option options [true, false] :persist Keep this resource's IO object open after dumping
41
+ def dump data, options={}
42
+ require 'fastercsv'
43
+ data.each do |row|
44
+ write(FasterCSV.generate_line(row, delimited_options))
45
+ end
46
+ io.close unless options[:persist]
47
+ self
48
+ end
49
+ end
50
+
51
+ module Csv
52
+ include Delimited
53
+
54
+ # Default options to be passed to
55
+ # FasterCSV[http://fastercsv.rubyforge.org/]; see its
56
+ # documentation for more information.
57
+ #
58
+ # @return [Hash]
59
+ def delimited_options
60
+ @delimited_options ||= {
61
+ :col_sep => ',',
62
+ :headers => false,
63
+ :return_headers => false,
64
+ :write_headers => true,
65
+ :skip_blanks => false,
66
+ :force_quotes => false
67
+ }
68
+ end
69
+ end
70
+
71
+ module Tsv
72
+ include Delimited
73
+
74
+ # Default options to be passed to
75
+ # FasterCSV[http://fastercsv.rubyforge.org/]; see its
76
+ # documentation for more information.
77
+ #
78
+ # @return [Hash]
79
+ def delimited_options
80
+ @delimited_options ||= {
81
+ :col_sep => "\t",
82
+ :headers => false,
83
+ :return_headers => false,
84
+ :write_headers => true,
85
+ :skip_blanks => false,
86
+ :force_quotes => false
87
+ }
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,125 @@
1
+ module IMW
2
+ module Resources
3
+ module Formats
4
+
5
+ # Defines methods for reading and writing Microsoft Excel data.
6
+ module Excel
7
+
8
+ attr_accessor :book, :sheet
9
+
10
+ def self.extended obj
11
+ if obj.exist?
12
+ @book = Spreadsheet.open path
13
+ @sheet = book.worksheet(0)
14
+
15
+ end
16
+ end
17
+
18
+
19
+ def book
20
+ return @book if @book
21
+ if exists?
22
+ @book = Spreadsheet.open(path)
23
+ else
24
+ @book = Spreadsheet::Workbook.new
25
+ end
26
+ end
27
+
28
+ def sheet
29
+ @sheet = @book.create_worksheet
30
+ @sheet
31
+ end
32
+
33
+ #If an Excel file exists at the location specified by uri then
34
+ #it is opened and can be read out with a subsequent call to
35
+ #load(). Otherwise, a new workbook is created and can be written
36
+ #to with the dump() method.
37
+ def initialize uri, mode='r', options={}
38
+ self.uri = uri
39
+ @max_lines = options[:max_lines] || 65000
40
+ @idx = 0
41
+ @book_idx = 0
42
+ @sht_idx = 0
43
+ unless self.exist?
44
+ make_new_book
45
+ make_new_sheet
46
+ else
47
+ get_existing_book
48
+ end
49
+ end
50
+
51
+ #Returns the data in an existing workbook as an
52
+ #array of arrays. Only capable of reading a single sheet.
53
+ def load
54
+ @sheet.map{|row| row.to_a}
55
+ end
56
+
57
+ #Dumps data, which is assumed to be an array of arrays, to a
58
+ #newly created Excel workbook. Attempting to dump to a book
59
+ #that already exists will typically result in file corruption.
60
+ #Raises a 'too many lines' error if the number of lines
61
+ #of data exceeds max_lines.
62
+ def dump data
63
+ data.each do |line|
64
+ raise "too many lines" if too_many?
65
+ self << line
66
+ end
67
+ save unless no_data?
68
+ end
69
+
70
+ #Processes a single line of data and updates internal variables.
71
+ #You shouldn't need to call this directly.
72
+ def << line
73
+ @sheet.row(@sht_row).concat( line )
74
+ @sht_row += 1
75
+ @idx += 1
76
+ end
77
+
78
+ #Instantiates a new Excel workbook in memory. You shouldn't
79
+ #need to call this directly.
80
+ def make_new_book
81
+ @book = Spreadsheet::Workbook.new
82
+ @book_idx += 1
83
+ end
84
+
85
+ #Makes a new worksheet for a pre-existing Excel workbook.
86
+ #This should be called after recovering from the
87
+ #'too many lines' error.
88
+ def make_new_sheet
89
+ @sheet = @book.create_worksheet
90
+ @sht_idx += 1
91
+ @sht_row = 0 #always start at row 0 in a new sheet
92
+ end
93
+
94
+ #Opens an existing Excel workbook. You shoudn't need to
95
+ #call this directly.
96
+ def get_existing_book
97
+ @sht_row = @sheet.row_count #would like to be able to dump new data, doesn't work
98
+ @sht_idx += 1
99
+ end
100
+
101
+ #Increments the current sheet to the next one in
102
+ #an open book. Not necessary at the moment.
103
+ def incr_sheet
104
+ @sheet = book.worksheet @sht_idx
105
+ end
106
+
107
+ #There are too many lines if the number of rows attempting
108
+ #to be written exceeds max_lines.
109
+ def too_many?
110
+ @sht_row >= @max_lines
111
+ end
112
+
113
+ #There is no data if the number of rows attempting to be written
114
+ #is zero.
115
+ def no_data?
116
+ @sht_row == 0
117
+ end
118
+
119
+ #Saves the workbook.
120
+ def save
121
+ @book.write path
122
+ end
123
+ end
124
+ end
125
+ end