imw 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.2
1
+ 0.2.3
@@ -121,6 +121,7 @@ module IMW
121
121
  # Remove the +tmp_dir+ entirely, getting rid of all temporary
122
122
  # files.
123
123
  def clean!
124
+ IMW.announce_if_verbose("Cleaning temporary directory #{tmp_dir}...")
124
125
  FileUtils.rm_rf(tmp_dir)
125
126
  end
126
127
 
@@ -0,0 +1,169 @@
1
+ module IMW
2
+ module Tools
3
+
4
+ # A class for producing summary data about a collection of
5
+ # resources.
6
+ #
7
+ # This summary data includes the directory tree, file sizes, file
8
+ # formats, record counts, &c.
9
+ class Summarizer
10
+
11
+ # The inputs to this Summarizer.
12
+ attr_reader :inputs
13
+
14
+ # Initialize a new Summarizer with the given +inputs+.
15
+ #
16
+ # @param [Array<String, IMW::Resource>] inputs
17
+ # @return [IMW::Tools::Summarizer]
18
+ def initialize *inputs
19
+ self.inputs = inputs.flatten
20
+ end
21
+
22
+ # Set new inputs for this summarizer.
23
+ #
24
+ # Clears any cached summary statistics
25
+ #
26
+ # @param [Array<String, IMW::Resource>] new_inputs
27
+ def inputs= new_inputs
28
+ @inputs = new_inputs.map do |input|
29
+ i = IMW.open(input)
30
+ raise PathError.new("Invalid input, #{i.path}") if i.is_local? && !i.exist? # don't check for remote files
31
+ i.is_directory? ? i.resources : i
32
+ end.compact.flatten
33
+ clear_cached_statistics!
34
+ end
35
+
36
+ # Reset all the cached statistics of this summarizer to +nil+.
37
+ def clear_cached_statistics!
38
+ [:num_files,
39
+ :num_direcories,
40
+ :total_size,
41
+ :extension_counts,
42
+ :most_common_extension_by_count,
43
+ :normalized_extension_counts,
44
+ :extension_sizes,
45
+ :most_common_extension_by_size,
46
+ :normalized_extension_sizes].each do |instance_variable|
47
+ self.instance_variable_set("@#{instance_variable}", nil)
48
+ end
49
+ end
50
+
51
+ # Return the number of files.
52
+ #
53
+ # @return [Integer]
54
+ def num_files
55
+ @num_files ||= inputs.size
56
+ end
57
+
58
+ # Return the number of directories.
59
+ #
60
+ # @return [Integer]
61
+ def num_directories
62
+ @num_directories ||= inputs.collect { |input| input.is_directory? }
63
+ end
64
+
65
+ # Return the total size.
66
+ #
67
+ # @return [Integer]
68
+ def total_size
69
+ @total_size ||= inputs.map(&:size).inject(0) { |e, sum| sum += e }
70
+ end
71
+
72
+ # Return the file counts of each extension.
73
+ #
74
+ # @return [Hash]
75
+ def extension_counts
76
+ @extension_counts ||= returning({}) do |counts|
77
+ inputs.each do |input|
78
+ next if input.is_directory?
79
+ counts[input.extension] = 0 unless counts.has_key?(input.extension)
80
+ counts[input.extension] += 1
81
+ end
82
+ end
83
+ end
84
+
85
+ # Return the most common extension by count of files.
86
+ def most_common_extension_by_count
87
+ return @most_common_extension_by_count if @most_common_extension_by_count
88
+ current_count, current_extension = 0, nil
89
+ extension_counts.each_pair do |extension, count|
90
+ current_extension = extension if count > current_count
91
+ end
92
+ if current_extension.strip.blank? then current_extension = 'flat' end
93
+ @most_common_extension_by_count = current_extension
94
+ end
95
+
96
+ # Return the file counts of each extension, normalized by the
97
+ # total number of files.
98
+ #
99
+ # @return [Hash]
100
+ def normalized_extension_counts
101
+ @normalized_extension_counts ||= returning({}) do |weighted|
102
+ extension_counts.each_pair do |extension, count|
103
+ weighted[extension] = count.to_f / num_files.to_f
104
+ end
105
+ end
106
+ end
107
+
108
+ # Return the amount of data corresponding to each extension.
109
+ #
110
+ # @return [Hash]
111
+ def extension_sizes
112
+ @extension_sizes ||= returning({}) do |sizes|
113
+ inputs.each do |input|
114
+ next if input.is_directory?
115
+ sizes[input.extension] = 0 unless sizes.has_key?(input.extension)
116
+ sizes[input.extension] += input.size
117
+ end
118
+ end
119
+ end
120
+
121
+ # Return the most common extension by amount of data.
122
+ #
123
+ # @return [String]
124
+ def most_common_extension_by_size
125
+ return @most_common_extension_by_size if @most_common_extension_by_size
126
+ current_size, current_extension = 0, nil
127
+ extension_sizes.each_pair do |extension, size|
128
+ current_extension = extension if size > current_size
129
+ end
130
+ if current_extension.strip.blank? then current_extension = 'flat' end
131
+ @most_common_extension_by_size = current_extension
132
+ end
133
+
134
+ # Return the fractional share of each extension by file size.
135
+ #
136
+ # @return [Hash]
137
+ def normalized_extension_sizes
138
+ @normalized_extension_sizes ||= returning({}) do |weighted|
139
+ extension_sizes.each_pair do |extension, size|
140
+ weighted[extension] = size.to_f / total_size.to_f
141
+ end
142
+ end
143
+ end
144
+
145
+ # Return a guess as to the most common extension format for this
146
+ # Summarizer's inputs.
147
+ #
148
+ # @return [String]
149
+ def most_common_extension
150
+ return most_common_extension_by_size if most_common_extension_by_size == most_common_extension_by_count # no contest
151
+ count_fraction = normalized_extension_counts[most_common_extension_by_count]
152
+ size_fraction = normalized_extension_sizes[most_common_extension_by_size]
153
+ return most_common_extension_by_count if count_fraction > 0.5 and size_fraction < 0.5 # choose the winner based on differential
154
+ return most_common_extension_by_size if count_fraction < 0.5 and size_fraction > 0.5
155
+ most_common_extension_by_size # default to size
156
+ end
157
+
158
+ # Returns a guess as to the most common data format for this
159
+ # Summarizer's inputs.
160
+ #
161
+ # @return [String]
162
+ def most_common_data_format
163
+ extension = most_common_extension
164
+ ['tar', 'tar.bz2', 'tar.gz', 'tgz', 'tbz2', 'zip', 'rar'].include?(extension) ? 'archive' : extension
165
+ end
166
+
167
+ end
168
+ end
169
+ end
data/lib/imw/tools.rb CHANGED
@@ -2,6 +2,7 @@ module IMW
2
2
  module Tools
3
3
  autoload :Archiver, 'imw/tools/archiver'
4
4
  autoload :Transferer, 'imw/tools/transferer'
5
+ autoload :Summarizer, 'imw/tools/summarizer'
5
6
  end
6
7
  end
7
8
 
@@ -58,8 +58,9 @@ module IMW
58
58
  def self.system *commands
59
59
  stripped_commands = commands.flatten.map { |command| command.to_s unless command.blank? }.compact
60
60
  IMW.announce_if_verbose(stripped_commands.join(" "))
61
- Kernel.system(*stripped_commands)
61
+ exit_code = Kernel.system(*stripped_commands)
62
62
  raise IMW::SystemCallError.new($?.dup, commands.join(' ')) unless $?.success?
63
+ exit_code
63
64
  end
64
65
  end
65
66
 
@@ -0,0 +1,6 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper"
2
+
3
+ describe IMW::Tools::Summarizer do
4
+ end
5
+
6
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: imw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dhruv Bansal
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2010-05-17 00:00:00 -05:00
13
+ date: 2010-05-20 00:00:00 -05:00
14
14
  default_executable: imw
15
15
  dependencies: []
16
16
 
@@ -72,6 +72,7 @@ files:
72
72
  - lib/imw/schemes/s3.rb
73
73
  - lib/imw/tools.rb
74
74
  - lib/imw/tools/archiver.rb
75
+ - lib/imw/tools/summarizer.rb
75
76
  - lib/imw/tools/transferer.rb
76
77
  - lib/imw/utils.rb
77
78
  - lib/imw/utils/error.rb
@@ -120,6 +121,7 @@ files:
120
121
  - spec/imw/schemes/remote_spec.rb
121
122
  - spec/imw/schemes/s3_spec.rb
122
123
  - spec/imw/tools/archiver_spec.rb
124
+ - spec/imw/tools/summarizer_spec.rb
123
125
  - spec/imw/tools/transferer_spec.rb
124
126
  - spec/imw/utils/paths_spec.rb
125
127
  - spec/imw/utils/shared_paths_spec.rb
@@ -167,6 +169,7 @@ test_files:
167
169
  - spec/imw/archives/tarbz2_spec.rb
168
170
  - spec/imw/archives/rar_spec.rb
169
171
  - spec/imw/tools/archiver_spec.rb
172
+ - spec/imw/tools/summarizer_spec.rb
170
173
  - spec/imw/tools/transferer_spec.rb
171
174
  - spec/imw/compressed_files/compressible_spec.rb
172
175
  - spec/imw/compressed_files/bz2_spec.rb