imw 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. data/.gitignore +4 -1
  2. data/Rakefile +10 -0
  3. data/TODO +18 -0
  4. data/VERSION +1 -1
  5. data/bin/imw +1 -1
  6. data/etc/imwrc.rb +0 -50
  7. data/examples/dataset.rb +12 -0
  8. data/lib/imw/boot.rb +55 -9
  9. data/lib/imw/dataset/paths.rb +15 -24
  10. data/lib/imw/dataset/workflow.rb +131 -72
  11. data/lib/imw/dataset.rb +94 -186
  12. data/lib/imw/parsers/html_parser.rb +1 -1
  13. data/lib/imw/parsers.rb +1 -1
  14. data/lib/imw/repository.rb +3 -27
  15. data/lib/imw/resource.rb +190 -0
  16. data/lib/imw/resources/archive.rb +97 -0
  17. data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
  18. data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
  19. data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
  20. data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
  21. data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
  22. data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
  23. data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
  24. data/lib/imw/resources/archives_and_compressed.rb +32 -0
  25. data/lib/imw/resources/compressed_file.rb +89 -0
  26. data/lib/imw/resources/compressible.rb +77 -0
  27. data/lib/imw/resources/formats/delimited.rb +92 -0
  28. data/lib/imw/resources/formats/excel.rb +125 -0
  29. data/lib/imw/resources/formats/json.rb +53 -0
  30. data/lib/imw/resources/formats/sgml.rb +72 -0
  31. data/lib/imw/resources/formats/yaml.rb +53 -0
  32. data/lib/imw/resources/formats.rb +32 -0
  33. data/lib/imw/resources/local.rb +198 -0
  34. data/lib/imw/resources/remote.rb +110 -0
  35. data/lib/imw/resources/schemes/hdfs.rb +242 -0
  36. data/lib/imw/resources/schemes/http.rb +161 -0
  37. data/lib/imw/resources/schemes/s3.rb +137 -0
  38. data/lib/imw/resources/schemes.rb +19 -0
  39. data/lib/imw/resources.rb +118 -0
  40. data/lib/imw/runner.rb +5 -4
  41. data/lib/imw/transforms/archiver.rb +215 -0
  42. data/lib/imw/transforms/transferer.rb +103 -0
  43. data/lib/imw/transforms.rb +8 -0
  44. data/lib/imw/utils/error.rb +26 -30
  45. data/lib/imw/utils/extensions/array.rb +5 -15
  46. data/lib/imw/utils/extensions/hash.rb +6 -16
  47. data/lib/imw/utils/extensions/hpricot.rb +0 -14
  48. data/lib/imw/utils/extensions/string.rb +5 -15
  49. data/lib/imw/utils/extensions/symbol.rb +0 -13
  50. data/lib/imw/utils/extensions.rb +65 -0
  51. data/lib/imw/utils/log.rb +14 -13
  52. data/lib/imw/utils/misc.rb +0 -6
  53. data/lib/imw/utils/paths.rb +101 -42
  54. data/lib/imw/utils/version.rb +8 -9
  55. data/lib/imw/utils.rb +2 -18
  56. data/lib/imw.rb +92 -17
  57. data/spec/data/sample.csv +1 -1
  58. data/spec/data/sample.json +1 -0
  59. data/spec/data/sample.tsv +1 -1
  60. data/spec/data/sample.txt +1 -1
  61. data/spec/data/sample.xml +1 -1
  62. data/spec/data/sample.yaml +1 -1
  63. data/spec/imw/dataset/paths_spec.rb +32 -0
  64. data/spec/imw/dataset/workflow_spec.rb +41 -0
  65. data/spec/imw/resource_spec.rb +79 -0
  66. data/spec/imw/resources/archive_spec.rb +69 -0
  67. data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
  68. data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
  69. data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
  70. data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
  71. data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
  72. data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
  73. data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
  74. data/spec/imw/resources/compressed_file_spec.rb +48 -0
  75. data/spec/imw/resources/compressible_spec.rb +36 -0
  76. data/spec/imw/resources/formats/delimited_spec.rb +33 -0
  77. data/spec/imw/resources/formats/json_spec.rb +32 -0
  78. data/spec/imw/resources/formats/sgml_spec.rb +24 -0
  79. data/spec/imw/resources/formats/yaml_spec.rb +41 -0
  80. data/spec/imw/resources/local_spec.rb +98 -0
  81. data/spec/imw/resources/remote_spec.rb +35 -0
  82. data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
  83. data/spec/imw/resources/schemes/http_spec.rb +19 -0
  84. data/spec/imw/resources/schemes/s3_spec.rb +19 -0
  85. data/spec/imw/transforms/archiver_spec.rb +120 -0
  86. data/spec/imw/transforms/transferer_spec.rb +113 -0
  87. data/spec/imw/utils/paths_spec.rb +5 -33
  88. data/spec/imw/utils/shared_paths_spec.rb +29 -0
  89. data/spec/spec_helper.rb +5 -5
  90. data/spec/support/paths_matcher.rb +67 -0
  91. data/spec/support/random.rb +39 -36
  92. metadata +88 -75
  93. data/lib/imw/dataset/task.rb +0 -41
  94. data/lib/imw/files/archive.rb +0 -113
  95. data/lib/imw/files/basicfile.rb +0 -122
  96. data/lib/imw/files/binary.rb +0 -28
  97. data/lib/imw/files/compressed_file.rb +0 -93
  98. data/lib/imw/files/compressed_files_and_archives.rb +0 -334
  99. data/lib/imw/files/compressible.rb +0 -103
  100. data/lib/imw/files/csv.rb +0 -113
  101. data/lib/imw/files/directory.rb +0 -62
  102. data/lib/imw/files/excel.rb +0 -84
  103. data/lib/imw/files/json.rb +0 -41
  104. data/lib/imw/files/sgml.rb +0 -46
  105. data/lib/imw/files/text.rb +0 -68
  106. data/lib/imw/files/yaml.rb +0 -46
  107. data/lib/imw/files.rb +0 -125
  108. data/lib/imw/packagers/archiver.rb +0 -126
  109. data/lib/imw/packagers/s3_mover.rb +0 -36
  110. data/lib/imw/packagers.rb +0 -8
  111. data/lib/imw/utils/components.rb +0 -61
  112. data/lib/imw/utils/config.rb +0 -46
  113. data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
  114. data/lib/imw/utils/extensions/core.rb +0 -27
  115. data/lib/imw/utils/extensions/dir.rb +0 -24
  116. data/lib/imw/utils/extensions/file_core.rb +0 -64
  117. data/lib/imw/utils/extensions/typed_struct.rb +0 -22
  118. data/lib/imw/utils/extensions/uri.rb +0 -59
  119. data/lib/imw/utils/view/dump_csv.rb +0 -112
  120. data/lib/imw/utils/view/dump_csv_older.rb +0 -117
  121. data/lib/imw/utils/view.rb +0 -113
  122. data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
  123. data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
  124. data/spec/imw/files/archive_spec.rb +0 -118
  125. data/spec/imw/files/basicfile_spec.rb +0 -121
  126. data/spec/imw/files/bz2_spec.rb +0 -32
  127. data/spec/imw/files/compressed_file_spec.rb +0 -96
  128. data/spec/imw/files/compressible_spec.rb +0 -100
  129. data/spec/imw/files/file_spec.rb +0 -144
  130. data/spec/imw/files/gz_spec.rb +0 -32
  131. data/spec/imw/files/rar_spec.rb +0 -33
  132. data/spec/imw/files/tar_spec.rb +0 -31
  133. data/spec/imw/files/text_spec.rb +0 -23
  134. data/spec/imw/files/zip_spec.rb +0 -31
  135. data/spec/imw/files_spec.rb +0 -38
  136. data/spec/imw/packagers/archiver_spec.rb +0 -125
  137. data/spec/imw/packagers/s3_mover_spec.rb +0 -7
  138. data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
  139. data/spec/imw/utils/extensions/find_spec.rb +0 -113
  140. data/spec/imw/workflow/rip/local_spec.rb +0 -89
  141. data/spec/imw/workflow/rip_spec.rb +0 -27
  142. data/spec/support/archive_contents_matcher.rb +0 -94
  143. data/spec/support/directory_contents_matcher.rb +0 -61
@@ -0,0 +1,18 @@
1
+ module IMW
2
+ module Resources
3
+ module CompressedFiles
4
+ module Gz
5
+
6
+ include IMW::Resources::CompressedFile
7
+
8
+ def compression_settings
9
+ @compression_settings ||= {
10
+ :decompression_program => :gunzip,
11
+ :decompress => '-fd'
12
+ }
13
+ end
14
+
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,23 @@
1
+ require 'imw/resources/archive'
2
+
3
+ module IMW
4
+ module Resources
5
+ module Archives
6
+ module Rar
7
+
8
+ include IMW::Resources::Archive
9
+
10
+ def archive_settings
11
+ @archive_settings ||= {
12
+ :program => :rar,
13
+ :create => ['a', '-o+', '-inul'],
14
+ :append => ['a', '-o+', '-inul'],
15
+ :list => "vb",
16
+ :extract => ['x', '-o+', '-inul']
17
+ }
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+
@@ -0,0 +1,23 @@
1
+ require 'imw/resources/archive'
2
+
3
+ module IMW
4
+ module Resources
5
+ module Archives
6
+ module Tar
7
+
8
+ include IMW::Resources::Archive
9
+
10
+ def archive_settings
11
+ @archive_settings ||= {
12
+ :create => "-cf",
13
+ :append => "-rf",
14
+ :list => "-tf",
15
+ :extract => "-xf",
16
+ :program => :tar
17
+ }
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+
@@ -0,0 +1,78 @@
1
+ require 'imw/resources/archive'
2
+ require 'imw/resources/compressed_file'
3
+
4
+ module IMW
5
+ module Resources
6
+ module Archives
7
+ module Tarbz2
8
+
9
+ #
10
+ # It's a compressed file
11
+ #
12
+
13
+ include IMW::Resources::CompressedFile
14
+
15
+ def compression_settings
16
+ @compression_settings ||= {
17
+ :program => :bzip2,
18
+ :decompression_program => :bunzip2,
19
+ :decompress => '',
20
+ :extension => 'bz2'
21
+ }
22
+ end
23
+
24
+ #
25
+ # But it's also an archive
26
+ #
27
+
28
+ include IMW::Resources::Archive
29
+
30
+ def archive_settings
31
+ @archive_settings ||= {
32
+ :program => :tar,
33
+ :create => '-cf',
34
+ :list => "-tjf",
35
+ :extract => "-xjf"
36
+ }
37
+ end
38
+
39
+ # Overrides default behvaior of IMW::Files::Archive#create to
40
+ # compress files after creating them.
41
+ def create *input_paths
42
+ IMW.system(archive_settings[:program], archive_settings[:create], path_between_archive_and_compression, *input_paths.flatten)
43
+ IMW.open(path_between_archive_and_compression).compress!
44
+ end
45
+
46
+ def decompressed_basename
47
+ case extname
48
+ when '.tar.bz2' then basename[0..-5] # .tar.bz2 => .tar
49
+ when '.tbz2' then basename.gsub(/tbz2$/, 'tar') # .tbz2 => .tar
50
+ else basename[0..-(extname.size + 1)]
51
+ end
52
+ end
53
+
54
+
55
+ protected
56
+ def path_between_archive_and_compression
57
+ File.join(dirname,name + '.tar')
58
+ end
59
+
60
+ public
61
+
62
+ #
63
+ # It's a compressed file AND an archive!
64
+ #
65
+
66
+ def extname
67
+ case path
68
+ when /\.tar\.bz2$/ then '.tar.bz2'
69
+ when /\.tbz2$/ then '.tbz2'
70
+ else File.extname(path)
71
+ end
72
+ end
73
+
74
+ end
75
+ end
76
+ end
77
+ end
78
+
@@ -0,0 +1,78 @@
1
+ require 'imw/resources/archive'
2
+ require 'imw/resources/compressed_file'
3
+
4
+ module IMW
5
+ module Resources
6
+ module Archives
7
+ module Targz
8
+
9
+ #
10
+ # It's a compressed file
11
+ #
12
+
13
+ include IMW::Resources::CompressedFile
14
+
15
+ def compression_settings
16
+ @compression_settings ||= {
17
+ :program => :gzip,
18
+ :decompression_program => :gunzip,
19
+ :decompress => '',
20
+ :extension => 'gz'
21
+ }
22
+ end
23
+
24
+ #
25
+ # But it's also an archive
26
+ #
27
+
28
+ include IMW::Resources::Archive
29
+
30
+ def archive_settings
31
+ @archive_settings ||= {
32
+ :program => :tar,
33
+ :list => "-tzf",
34
+ :create => '-cf',
35
+ :extract => "-xzf"
36
+ }
37
+ end
38
+
39
+ # Overrides default behvaior of IMW::Files::Archive#create to
40
+ # compress files after creating them.
41
+ def create *input_paths
42
+ IMW.system(archive_settings[:program], archive_settings[:create].split, path_between_archive_and_compression, *input_paths.flatten)
43
+ tar = IMW.open(path_between_archive_and_compression)
44
+ tar.compression_settings = compression_settings
45
+ tar.compress!
46
+ end
47
+
48
+ def decompressed_basename
49
+ case extname
50
+ when '.tar.gz' then basename[0..-4] # .tar.gz => .tar
51
+ when '.tgz' then basename.gsub(/tgz$/, 'tar') # .tgz => .tar
52
+ else basename[0..-(extname.size + 1)]
53
+ end
54
+ end
55
+
56
+ protected
57
+ def path_between_archive_and_compression
58
+ File.join(dirname,name + '.tar')
59
+ end
60
+ public
61
+
62
+ #
63
+ # It's both an archive and a compressed file!
64
+ #
65
+
66
+ def extname
67
+ case path
68
+ when /\.tar\.gz$/ then '.tar.gz'
69
+ when /\.tgz$/ then '.tgz'
70
+ else File.extname(path)
71
+ end
72
+ end
73
+
74
+ end
75
+ end
76
+ end
77
+ end
78
+
@@ -0,0 +1,57 @@
1
+ require 'imw/resources/archive'
2
+
3
+ module IMW
4
+ module Resources
5
+ module Archives
6
+ module Zip
7
+
8
+ include IMW::Resources::Archive
9
+
10
+ def archive_settings
11
+ @archive_settings ||= {
12
+ :program => :zip,
13
+ :create => "-qqr",
14
+ :append => "-qqg",
15
+ :list => "-l",
16
+ :extract => "-qqo",
17
+ :unarchiving_program => :unzip
18
+ }
19
+ end
20
+
21
+ protected
22
+
23
+ # The `unzip' program outputs data in a very annoying format:
24
+ #
25
+ # Archive: data.zip
26
+ # Length Date Time Name
27
+ # -------- ---- ---- ----
28
+ # 18510 07-28-08 15:58 data/4d7Qrgz7.csv
29
+ # 3418 07-28-08 15:41 data/7S.csv
30
+ # 23353 07-28-08 15:41 data/g.csv
31
+ # 711 07-28-08 15:58 data/g.xml
32
+ # 1095 07-28-08 15:41 data/L.xml
33
+ # 2399 07-28-08 15:58 data/mTAu9H3.xml
34
+ # 152 07-28-08 15:58 data/vaHBS2t5R.dat
35
+ # -------- -------
36
+ # 49638 7 files
37
+ #
38
+ # which is parsed by this method.
39
+ def archive_contents_string_to_array string
40
+ rows = string.split("\n")
41
+ # ignore the first 3 lines of the output and also discared the
42
+ # last 2 (5 = 2 + 3)
43
+ file_rows = rows[3,(rows.length - 5)]
44
+ file_rows.map do |row|
45
+ if row
46
+ columns = row.lstrip.rstrip.split(' ')
47
+ # grab the filename in the fourth column
48
+ columns[3..-1].join(' ')
49
+ end
50
+ end.compact
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+
@@ -0,0 +1,32 @@
1
+ module IMW
2
+ module Resources
3
+ autoload :Compressible, 'imw/resources/compressible'
4
+ autoload :CompressedFile, 'imw/resources/compressed_file'
5
+ autoload :Archive, 'imw/resources/archive'
6
+ autoload :Archives, 'imw/resources/archive'
7
+ autoload :CompressedFiles, 'imw/resources/compressed_file'
8
+
9
+ # Handlers which augment the resource with methods for archiving,
10
+ # extracting, compressing, decompressing...
11
+ ARCHIVE_AND_COMPRESSED_HANDLERS = [
12
+
13
+ # try compressible first -- compressed files below will override it
14
+ ["Compressible", Proc.new { |r| r.is_local? } ],
15
+
16
+ # order is important! -- tar.bz2 must come before .bz2, &c.
17
+ ["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.bz2$/ } ],
18
+ ["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tbz2$/ } ],
19
+ ["CompressedFiles::Bz2", Proc.new { |r| r.is_local? && r.path =~ /\.bz2$/ && r.path !~ /\.tar\.bz2$/ && r.path !~ /\.tbz2$/ } ],
20
+ ["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.gz$/ } ],
21
+ ["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tgz$/ } ],
22
+ ["CompressedFiles::Gz", Proc.new { |r| r.is_local? && r.path =~ /\.gz$/ && r.path !~ /\.tar\.gz$/ && r.path !~ /\.tgz$/ } ],
23
+ ["Archives::Tar", Proc.new { |r| r.is_local? && r.path =~ /\.tar$/ } ],
24
+ ["Archives::Rar", Proc.new { |r| r.is_local? && r.path =~ /\.rar$/ } ],
25
+ ["Archives::Zip", Proc.new { |r| r.is_local? && r.path =~ /\.zip$/ } ]
26
+
27
+ ]
28
+
29
+
30
+ end
31
+ end
32
+
@@ -0,0 +1,89 @@
1
+ module IMW
2
+ module Resources
3
+
4
+ module CompressedFiles
5
+ autoload :Bz2, 'imw/resources/archives_and_compressed/bz2'
6
+ autoload :Gz, 'imw/resources/archives_and_compressed/gz'
7
+ end
8
+
9
+ # Defines methods for decompressing a compressed file. This
10
+ # module isn't used to directly extend an IMW::Resource --
11
+ # instead, format specific modules (e.g. -
12
+ # IMW::Resources::CompressedFiles::Bz2) include this module and
13
+ # further define the command-line flags &c. needed to make
14
+ # everything work.
15
+ module CompressedFile
16
+
17
+ attr_accessor :compression_settings
18
+
19
+ # Is this file compressed?
20
+ #
21
+ # @return [true, false]
22
+ def is_compressed?
23
+ true
24
+ end
25
+
26
+ # Can this file be compressed?
27
+ #
28
+ # @return [true, false]
29
+ def is_compressible?
30
+ false
31
+ end
32
+
33
+ # The basename of this resource after it is decompressed
34
+ #
35
+ # IMW::Resource.new('/path/to/my_file.txt.bz2').decompressed_basename
36
+ # => 'my_file.txt'
37
+ #
38
+ # @return [String] the decompressed basename
39
+ def decompressed_basename
40
+ basename[0..-(extname.size + 1)]
41
+ end
42
+
43
+ # The path of this resource after it is decompressed
44
+ #
45
+ # IMW::Resource.new('/path/to/my_file.txt.bz2').decompressed_basename
46
+ # => '/path/to/my_file.txt'
47
+ #
48
+ # @return [String] the decompressed path
49
+ def decompressed_path
50
+ File.join(dirname, decompressed_basename)
51
+ end
52
+
53
+ # Decompress this file in its present directory overwriting any
54
+ # existing files and without saving the original compressed
55
+ # file.
56
+ #
57
+ # @return [IMW::Resource] the decompressed resource
58
+ def decompress!
59
+ should_exist!("Cannot decompress.")
60
+ program = compression_settings[:decompression_program] || compression_settings[:program]
61
+ FileUtils.cd(dirname) { IMW.system(program, compression_settings[:decompress], path) }
62
+ IMW.open(decompressed_path)
63
+ end
64
+
65
+ # Decompress this file in its present directory, overwriting any
66
+ # existing files while keeping the original compressed file.
67
+ #
68
+ # FIXME The implementation is a little stupid as the file is
69
+ # needlessly copied.
70
+ #
71
+ # @return [IMW::Resource] the decompressed resource
72
+ def decompress
73
+ should_exist!("Cannot decompress.")
74
+ begin
75
+ copy = cp(path + '.imw_copy')
76
+ regular_file = decompress!
77
+ copy.mv(path)
78
+ regular_file
79
+ ensure
80
+ copy.mv(path) if copy && copy.exist?
81
+ end
82
+ end
83
+
84
+ end
85
+ end
86
+ end
87
+
88
+
89
+
@@ -0,0 +1,77 @@
1
+ module IMW
2
+
3
+ # Default settings used when compressing files. <tt>:program</tt>
4
+ # defines the name of the command-line program to use,
5
+ # <tt>:compress</tt> gives the flags to use when compressing, and
6
+ # <tt>:extension</tt> gives the extension (_without_ the `.') added
7
+ # by the program after compressing.
8
+ COMPRESSION_SETTINGS = {
9
+ :program => 'bzip2',
10
+ :compress => '',
11
+ :extension => 'bz2'
12
+ } unless defined?(COMPRESSION_SETTINGS)
13
+
14
+ module Resources
15
+
16
+ # Defines methods for compressing a file. The default compression
17
+ # program is defined in IMW::COMPRESSION_SETTINGS though a
18
+ # particular resource can change the values in its
19
+ # +compression_settings+ hash.
20
+ module Compressible
21
+
22
+ # Compression settings.
23
+ attr_accessor :compression_settings
24
+
25
+ # Is this file compressible?
26
+ #
27
+ # @return [true]
28
+ def is_compressible?
29
+ true
30
+ end
31
+
32
+ # Defines the compression settings used for this
33
+ # resource. <tt>:program</tt> defines the name of the
34
+ # command-line program to use, <tt>:compress</tt> gives the
35
+ # flags to use when compressing, and <tt>:extension</tt> gives
36
+ # the extension (_without_ the `.') added by the program after
37
+ # compressing.
38
+ #
39
+ # @return [Hash]
40
+ def compression_settings
41
+ @compression_settings ||= COMPRESSION_SETTINGS
42
+ end
43
+
44
+ # Compress this resource in place, overwriting it.
45
+ #
46
+ # This resource's +compression_settings+ method is used to
47
+ # determine the method of compression.
48
+ #
49
+ # @return [IMW::Resource] the compressed file
50
+ def compress!
51
+ should_exist!("Cannot compress.")
52
+ IMW.system(*[compression_settings[:program], compression_settings[:compress], path])
53
+ IMW.open(File.join(dirname,basename + "." + compression_settings[:extension]))
54
+ end
55
+
56
+ # Compress this resource without overwriting it.
57
+ #
58
+ # FIXME The implementation is a little stupid as the file is
59
+ # needlessly copied.
60
+ #
61
+ # @return [IMW::Resource] the compressed file
62
+ def compress options={}
63
+ should_exist!("Cannot compress.")
64
+ begin
65
+ copy = cp(path + '.imw_copy')
66
+ compressed_file = compress!
67
+ copy.mv(path)
68
+ compressed_file
69
+ ensure
70
+ copy.mv(path) if copy.exist?
71
+ end
72
+ end
73
+
74
+ end
75
+ end
76
+ end
77
+
@@ -0,0 +1,92 @@
1
+ module IMW
2
+ module Resources
3
+ module Formats
4
+
5
+ # Defines methods used for parsing and writing delimited data
6
+ # formats (CSV, TSV, &c.) with the FasterCSV library. This
7
+ # module is not used to directly extend a resource. Instead,
8
+ # more specific modules (e.g. - IMW::Resources::Formats::Csv)
9
+ # include this one and also define +delimited_options+ which is
10
+ # actually what's passed to FasterCSV.
11
+ #
12
+ # @abstract
13
+ module Delimited
14
+
15
+ attr_accessor :delimited_settings
16
+
17
+ # Return the data in this delimited resource as an array of
18
+ # arrays.
19
+ #
20
+ # Yield each outer array (row) if passed a block.
21
+ #
22
+ # @return [Array] the full data matrix
23
+ # @yield [Array] each row of the data
24
+ def load &block
25
+ require 'fastercsv'
26
+ FasterCSV.parse(read, delimited_options, &block)
27
+ end
28
+
29
+ # Map each row in this delimited resource.
30
+ #
31
+ # @yield [Array] each row of the data
32
+ def map &block
33
+ load.map(&block)
34
+ end
35
+
36
+ # Dump an array of arrays into this resource.
37
+ #
38
+ # @param [Array] data array of arrays to dump
39
+ # @param [Hash] options
40
+ # @option options [true, false] :persist Keep this resource's IO object open after dumping
41
+ def dump data, options={}
42
+ require 'fastercsv'
43
+ data.each do |row|
44
+ write(FasterCSV.generate_line(row, delimited_options))
45
+ end
46
+ io.close unless options[:persist]
47
+ self
48
+ end
49
+ end
50
+
51
+ module Csv
52
+ include Delimited
53
+
54
+ # Default options to be passed to
55
+ # FasterCSV[http://fastercsv.rubyforge.org/]; see its
56
+ # documentation for more information.
57
+ #
58
+ # @return [Hash]
59
+ def delimited_options
60
+ @delimited_options ||= {
61
+ :col_sep => ',',
62
+ :headers => false,
63
+ :return_headers => false,
64
+ :write_headers => true,
65
+ :skip_blanks => false,
66
+ :force_quotes => false
67
+ }
68
+ end
69
+ end
70
+
71
+ module Tsv
72
+ include Delimited
73
+
74
+ # Default options to be passed to
75
+ # FasterCSV[http://fastercsv.rubyforge.org/]; see its
76
+ # documentation for more information.
77
+ #
78
+ # @return [Hash]
79
+ def delimited_options
80
+ @delimited_options ||= {
81
+ :col_sep => "\t",
82
+ :headers => false,
83
+ :return_headers => false,
84
+ :write_headers => true,
85
+ :skip_blanks => false,
86
+ :force_quotes => false
87
+ }
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,125 @@
1
+ module IMW
2
+ module Resources
3
+ module Formats
4
+
5
+ # Defines methods for reading and writing Microsoft Excel data.
6
+ module Excel
7
+
8
+ attr_accessor :book, :sheet
9
+
10
+ def self.extended obj
11
+ if obj.exist?
12
+ @book = Spreadsheet.open path
13
+ @sheet = book.worksheet(0)
14
+
15
+ end
16
+ end
17
+
18
+
19
+ def book
20
+ return @book if @book
21
+ if exists?
22
+ @book = Spreadsheet.open(path)
23
+ else
24
+ @book = Spreadsheet::Workbook.new
25
+ end
26
+ end
27
+
28
+ def sheet
29
+ @sheet = @book.create_worksheet
30
+ @sheet
31
+ end
32
+
33
+ #If an Excel file exists at the location specified by uri then
34
+ #it is opened and can be read out with a subsequent call to
35
+ #load(). Otherwise, a new workbook is created and can be written
36
+ #to with the dump() method.
37
+ def initialize uri, mode='r', options={}
38
+ self.uri = uri
39
+ @max_lines = options[:max_lines] || 65000
40
+ @idx = 0
41
+ @book_idx = 0
42
+ @sht_idx = 0
43
+ unless self.exist?
44
+ make_new_book
45
+ make_new_sheet
46
+ else
47
+ get_existing_book
48
+ end
49
+ end
50
+
51
+ #Returns the data in an existing workbook as an
52
+ #array of arrays. Only capable of reading a single sheet.
53
+ def load
54
+ @sheet.map{|row| row.to_a}
55
+ end
56
+
57
+ #Dumps data, which is assumed to be an array of arrays, to a
58
+ #newly created Excel workbook. Attempting to dump to a book
59
+ #that already exists will typically result in file corruption.
60
+ #Raises a 'too many lines' error if the number of lines
61
+ #of data exceeds max_lines.
62
+ def dump data
63
+ data.each do |line|
64
+ raise "too many lines" if too_many?
65
+ self << line
66
+ end
67
+ save unless no_data?
68
+ end
69
+
70
+ #Processes a single line of data and updates internal variables.
71
+ #You shouldn't need to call this directly.
72
+ def << line
73
+ @sheet.row(@sht_row).concat( line )
74
+ @sht_row += 1
75
+ @idx += 1
76
+ end
77
+
78
+ #Instantiates a new Excel workbook in memory. You shouldn't
79
+ #need to call this directly.
80
+ def make_new_book
81
+ @book = Spreadsheet::Workbook.new
82
+ @book_idx += 1
83
+ end
84
+
85
+ #Makes a new worksheet for a pre-existing Excel workbook.
86
+ #This should be called after recovering from the
87
+ #'too many lines' error.
88
+ def make_new_sheet
89
+ @sheet = @book.create_worksheet
90
+ @sht_idx += 1
91
+ @sht_row = 0 #always start at row 0 in a new sheet
92
+ end
93
+
94
+ #Opens an existing Excel workbook. You shoudn't need to
95
+ #call this directly.
96
+ def get_existing_book
97
+ @sht_row = @sheet.row_count #would like to be able to dump new data, doesn't work
98
+ @sht_idx += 1
99
+ end
100
+
101
+ #Increments the current sheet to the next one in
102
+ #an open book. Not necessary at the moment.
103
+ def incr_sheet
104
+ @sheet = book.worksheet @sht_idx
105
+ end
106
+
107
+ #There are too many lines if the number of rows attempting
108
+ #to be written exceeds max_lines.
109
+ def too_many?
110
+ @sht_row >= @max_lines
111
+ end
112
+
113
+ #There is no data if the number of rows attempting to be written
114
+ #is zero.
115
+ def no_data?
116
+ @sht_row == 0
117
+ end
118
+
119
+ #Saves the workbook.
120
+ def save
121
+ @book.write path
122
+ end
123
+ end
124
+ end
125
+ end