imw 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.2
1
+ 0.2.3
@@ -121,6 +121,7 @@ module IMW
121
121
  # Remove the +tmp_dir+ entirely, getting rid of all temporary
122
122
  # files.
123
123
  def clean!
124
+ IMW.announce_if_verbose("Cleaning temporary directory #{tmp_dir}...")
124
125
  FileUtils.rm_rf(tmp_dir)
125
126
  end
126
127
 
@@ -0,0 +1,169 @@
1
+ module IMW
2
+ module Tools
3
+
4
+ # A class for producing summary data about a collection of
5
+ # resources.
6
+ #
7
+ # This summary data includes the directory tree, file sizes, file
8
+ # formats, record counts, &c.
9
+ class Summarizer
10
+
11
+ # The inputs to this Summarizer.
12
+ attr_reader :inputs
13
+
14
+ # Initialize a new Summarizer with the given +inputs+.
15
+ #
16
+ # @param [Array<String, IMW::Resource>] inputs
17
+ # @return [IMW::Tools::Summarizer]
18
+ def initialize *inputs
19
+ self.inputs = inputs.flatten
20
+ end
21
+
22
+ # Set new inputs for this summarizer.
23
+ #
24
+ # Clears any cached summary statistics
25
+ #
26
+ # @param [Array<String, IMW::Resource>] new_inputs
27
+ def inputs= new_inputs
28
+ @inputs = new_inputs.map do |input|
29
+ i = IMW.open(input)
30
+ raise PathError.new("Invalid input, #{i.path}") if i.is_local? && !i.exist? # don't check for remote files
31
+ i.is_directory? ? i.resources : i
32
+ end.compact.flatten
33
+ clear_cached_statistics!
34
+ end
35
+
36
+ # Reset all the cached statistics of this summarizer to +nil+.
37
+ def clear_cached_statistics!
38
+ [:num_files,
39
+ :num_direcories,
40
+ :total_size,
41
+ :extension_counts,
42
+ :most_common_extension_by_count,
43
+ :normalized_extension_counts,
44
+ :extension_sizes,
45
+ :most_common_extension_by_size,
46
+ :normalized_extension_sizes].each do |instance_variable|
47
+ self.instance_variable_set("@#{instance_variable}", nil)
48
+ end
49
+ end
50
+
51
+ # Return the number of files.
52
+ #
53
+ # @return [Integer]
54
+ def num_files
55
+ @num_files ||= inputs.size
56
+ end
57
+
58
+ # Return the number of directories.
59
+ #
60
+ # @return [Integer]
61
+ def num_directories
62
+ @num_directories ||= inputs.collect { |input| input.is_directory? }
63
+ end
64
+
65
+ # Return the total size.
66
+ #
67
+ # @return [Integer]
68
+ def total_size
69
+ @total_size ||= inputs.map(&:size).inject(0) { |e, sum| sum += e }
70
+ end
71
+
72
+ # Return the file counts of each extension.
73
+ #
74
+ # @return [Hash]
75
+ def extension_counts
76
+ @extension_counts ||= returning({}) do |counts|
77
+ inputs.each do |input|
78
+ next if input.is_directory?
79
+ counts[input.extension] = 0 unless counts.has_key?(input.extension)
80
+ counts[input.extension] += 1
81
+ end
82
+ end
83
+ end
84
+
85
+ # Return the most common extension by count of files.
86
+ def most_common_extension_by_count
87
+ return @most_common_extension_by_count if @most_common_extension_by_count
88
+ current_count, current_extension = 0, nil
89
+ extension_counts.each_pair do |extension, count|
90
+ current_extension = extension if count > current_count
91
+ end
92
+ if current_extension.strip.blank? then current_extension = 'flat' end
93
+ @most_common_extension_by_count = current_extension
94
+ end
95
+
96
+ # Return the file counts of each extension, normalized by the
97
+ # total number of files.
98
+ #
99
+ # @return [Hash]
100
+ def normalized_extension_counts
101
+ @normalized_extension_counts ||= returning({}) do |weighted|
102
+ extension_counts.each_pair do |extension, count|
103
+ weighted[extension] = count.to_f / num_files.to_f
104
+ end
105
+ end
106
+ end
107
+
108
+ # Return the amount of data corresponding to each extension.
109
+ #
110
+ # @return [Hash]
111
+ def extension_sizes
112
+ @extension_sizes ||= returning({}) do |sizes|
113
+ inputs.each do |input|
114
+ next if input.is_directory?
115
+ sizes[input.extension] = 0 unless sizes.has_key?(input.extension)
116
+ sizes[input.extension] += input.size
117
+ end
118
+ end
119
+ end
120
+
121
+ # Return the most common extension by amount of data.
122
+ #
123
+ # @return [String]
124
+ def most_common_extension_by_size
125
+ return @most_common_extension_by_size if @most_common_extension_by_size
126
+ current_size, current_extension = 0, nil
127
+ extension_sizes.each_pair do |extension, size|
128
+ current_extension = extension if size > current_size
129
+ end
130
+ if current_extension.strip.blank? then current_extension = 'flat' end
131
+ @most_common_extension_by_size = current_extension
132
+ end
133
+
134
+ # Return the fractional share of each extension by file size.
135
+ #
136
+ # @return [Hash]
137
+ def normalized_extension_sizes
138
+ @normalized_extension_sizes ||= returning({}) do |weighted|
139
+ extension_sizes.each_pair do |extension, size|
140
+ weighted[extension] = size.to_f / total_size.to_f
141
+ end
142
+ end
143
+ end
144
+
145
+ # Return a guess as to the most common extension format for this
146
+ # Summarizer's inputs.
147
+ #
148
+ # @return [String]
149
+ def most_common_extension
150
+ return most_common_extension_by_size if most_common_extension_by_size == most_common_extension_by_count # no contest
151
+ count_fraction = normalized_extension_counts[most_common_extension_by_count]
152
+ size_fraction = normalized_extension_sizes[most_common_extension_by_size]
153
+ return most_common_extension_by_count if count_fraction > 0.5 and size_fraction < 0.5 # choose the winner based on differential
154
+ return most_common_extension_by_size if count_fraction < 0.5 and size_fraction > 0.5
155
+ most_common_extension_by_size # default to size
156
+ end
157
+
158
+ # Returns a guess as to the most common data format for this
159
+ # Summarizer's inputs.
160
+ #
161
+ # @return [String]
162
+ def most_common_data_format
163
+ extension = most_common_extension
164
+ ['tar', 'tar.bz2', 'tar.gz', 'tgz', 'tbz2', 'zip', 'rar'].include?(extension) ? 'archive' : extension
165
+ end
166
+
167
+ end
168
+ end
169
+ end
data/lib/imw/tools.rb CHANGED
@@ -2,6 +2,7 @@ module IMW
2
2
  module Tools
3
3
  autoload :Archiver, 'imw/tools/archiver'
4
4
  autoload :Transferer, 'imw/tools/transferer'
5
+ autoload :Summarizer, 'imw/tools/summarizer'
5
6
  end
6
7
  end
7
8
 
@@ -58,8 +58,9 @@ module IMW
58
58
  def self.system *commands
59
59
  stripped_commands = commands.flatten.map { |command| command.to_s unless command.blank? }.compact
60
60
  IMW.announce_if_verbose(stripped_commands.join(" "))
61
- Kernel.system(*stripped_commands)
61
+ exit_code = Kernel.system(*stripped_commands)
62
62
  raise IMW::SystemCallError.new($?.dup, commands.join(' ')) unless $?.success?
63
+ exit_code
63
64
  end
64
65
  end
65
66
 
@@ -0,0 +1,6 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper"
2
+
3
+ describe IMW::Tools::Summarizer do
4
+ end
5
+
6
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: imw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dhruv Bansal
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2010-05-17 00:00:00 -05:00
13
+ date: 2010-05-20 00:00:00 -05:00
14
14
  default_executable: imw
15
15
  dependencies: []
16
16
 
@@ -72,6 +72,7 @@ files:
72
72
  - lib/imw/schemes/s3.rb
73
73
  - lib/imw/tools.rb
74
74
  - lib/imw/tools/archiver.rb
75
+ - lib/imw/tools/summarizer.rb
75
76
  - lib/imw/tools/transferer.rb
76
77
  - lib/imw/utils.rb
77
78
  - lib/imw/utils/error.rb
@@ -120,6 +121,7 @@ files:
120
121
  - spec/imw/schemes/remote_spec.rb
121
122
  - spec/imw/schemes/s3_spec.rb
122
123
  - spec/imw/tools/archiver_spec.rb
124
+ - spec/imw/tools/summarizer_spec.rb
123
125
  - spec/imw/tools/transferer_spec.rb
124
126
  - spec/imw/utils/paths_spec.rb
125
127
  - spec/imw/utils/shared_paths_spec.rb
@@ -167,6 +169,7 @@ test_files:
167
169
  - spec/imw/archives/tarbz2_spec.rb
168
170
  - spec/imw/archives/rar_spec.rb
169
171
  - spec/imw/tools/archiver_spec.rb
172
+ - spec/imw/tools/summarizer_spec.rb
170
173
  - spec/imw/tools/transferer_spec.rb
171
174
  - spec/imw/compressed_files/compressible_spec.rb
172
175
  - spec/imw/compressed_files/bz2_spec.rb