readorder 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,147 @@
1
+ require 'readorder'
2
+
3
+ module Readorder
4
+ # The Command is the base class for any class that wants to implement a
5
+ # command line command for
6
+ #
7
+ # Inheriting from this calss will make the class registered and be available
8
+ # for invocation from the Runner class
9
+ #
10
+ # The lifecycle of a command is:
11
+ #
12
+ # 1) instantiation with a hash parameter
13
+ # 2) before
14
+ # 3) run
15
+ # 4) after
16
+ # 5) error calld if the runner catches and exception from the command
17
+ #
18
+ class Command
19
+ class Error < ::Readorder::Error ; end
20
+
21
+ def self.command_name
22
+ name.split("::").last.downcase
23
+ end
24
+
25
+ attr_reader :options
26
+ attr_reader :filelist
27
+ attr_reader :analyzer
28
+ attr_reader :output
29
+
30
+ def initialize( opts = {} )
31
+ @options = opts
32
+ @filelist = nil
33
+ @analyzer = nil
34
+ @output = nil
35
+ end
36
+
37
+ def filelist
38
+ unless @filelist then
39
+ begin
40
+ @filelist = Filelist.new( @options['filelist'] )
41
+ rescue => fe
42
+ msg = "Invalid file list. The list of files containing filenames should be given on the commandline, or filenames should be sent in on stdin."
43
+ raise Error, msg
44
+ end
45
+ end
46
+ return @filelist
47
+ end
48
+
49
+ def analyzer
50
+ @analyzer ||= Analyzer.new( filelist, self.get_physical? )
51
+ end
52
+
53
+ def output
54
+ unless @output then
55
+ if options['output'] then
56
+ logger.info "output going to #{options['output']}"
57
+ @output = File.open( options['output'] , "w+" )
58
+ else
59
+ @output = $stdout
60
+ end
61
+ end
62
+ return @output
63
+ end
64
+
65
+
66
+ def get_physical?
67
+ return false if @options['inode']
68
+ unless Datum.is_linux? then
69
+ logger.warn "unable to get physical block number, this is not a linux machine, it is #{Config::CONFIG['host_os']}"
70
+ return false
71
+ end
72
+ unless Process.euid == 0 then
73
+ logger.warn "no permissions to get physical block number, try running as root."
74
+ return false
75
+ end
76
+ return true
77
+ end
78
+
79
+ def command_name
80
+ self.class.command_name
81
+ end
82
+
83
+ def logger
84
+ ::Logging::Logger[self]
85
+ end
86
+
87
+ # called by the Runner before the command, this can be used to setup
88
+ # additional items for the command
89
+ def before() ; end
90
+
91
+ # called by the Runner to execute the command
92
+ def run
93
+ raise Error, "Unknown command `#{command_name}`"
94
+ end
95
+
96
+ # called by the Runner if an error is encountered during the run method
97
+ def error() nil; end
98
+
99
+ # called by runner if a signal is hit
100
+ def shutdown() nil; end
101
+
102
+ # called by runner when all is done
103
+ def after()
104
+ if output != $stdout then
105
+ output.close
106
+ end
107
+ if options['error-filelist'] then
108
+ if analyzer.bad_data.size > 0 then
109
+ File.open( options['error-filelist'], "w+" ) do |f|
110
+ analyzer.dump_bad_data_to( f )
111
+ end
112
+ logger.info "wrote error filelist to #{options['error-filelist']}"
113
+ end
114
+ end
115
+ end
116
+
117
+ class << self
118
+ # this method is invoked by the Ruby interpreter whenever a class inherts
119
+ # from Command. This is how commands register to be invoked
120
+ #
121
+ def inherited( klass )
122
+ return unless klass.instance_of? Class
123
+ return if commands.include? klass
124
+ commands << klass
125
+ end
126
+
127
+ # The list of commands registered.
128
+ #
129
+ def commands
130
+ unless defined? @commands
131
+ @commands = []
132
+ end
133
+ return @commands
134
+ end
135
+
136
+ # get the command klass for the given name
137
+ def find( name )
138
+ @commands.find { |klass| klass.command_name == name }
139
+ end
140
+
141
+ end
142
+ end
143
+ end
144
+
145
+ require 'readorder/commands/sort'
146
+ require 'readorder/commands/analyze'
147
+ require 'readorder/commands/test'
@@ -0,0 +1,17 @@
1
+ module Readorder
2
+ module Commands
3
+ #
4
+ # Analyze the list of files to sort and give a report
5
+ #
6
+ class Analyze < ::Readorder::Command
7
+ def run
8
+ analyzer.collect_data
9
+ output.puts @analyzer.summary_report
10
+ if options['data-csv'] then
11
+ File.open( options['data-csv'], "w+") { |f| analyzer.dump_good_data_to( f ) }
12
+ logger.info "dumped #{analyzer.good_data.size} rows to #{options['data-csv']}"
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,26 @@
1
+ require 'readorder/command'
2
+ module Readorder
3
+ module Commands
4
+ #
5
+ # Run an anlyzer to gather all the information and then output the
6
+ # filenames to stdout or to the output file
7
+ #
8
+ class Sort < ::Readorder::Command
9
+ def run
10
+ analyzer.collect_data
11
+ analyzer.log_summary_report
12
+ data = nil
13
+ if get_physical? then
14
+ logger.info "using physical order"
15
+ data = analyzer.physical_order
16
+ else
17
+ logger.info "using inode order"
18
+ data = analyzer.inode_order
19
+ end
20
+ data.values.each do |d|
21
+ output.puts d.filename
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,234 @@
1
+ require 'stringio'
2
+ module Readorder
3
+ module Commands
4
+ #
5
+ # Test reading all the contents of a subset of the files and report summary
6
+ # information on how long it takes to read the files given different
7
+ # reading orders.
8
+ #
9
+ class Test < ::Readorder::Command
10
+
11
+ #
12
+ # call-seq:
13
+ # test.before -> nil
14
+ #
15
+ # Part of the Command lifecycle. In the Test command this make sure we
16
+ # are on a Linux machine and running as root.
17
+ #
18
+ def before
19
+ super
20
+ if not Datum.is_linux? then
21
+ raise Error, "Only able to perform testing on linux. I know how to dump the file sysem cache there."
22
+ end
23
+ if Process.euid != 0 then
24
+ raise Error, "Must be root to perform testing."
25
+ end
26
+ end
27
+
28
+ #
29
+ # call-seq:
30
+ # test.first_of( Filelist ) -> Filelist
31
+ #
32
+ # Use the *percentage* option to take the first *percentage* of the input
33
+ # Filelist and return a new Filelist object continaing that subjset.
34
+ #
35
+ def first_of( data )
36
+ percentage = options['percentage']
37
+ logger.info "gathering the first #{percentage}% of the data"
38
+ lines = []
39
+ data.each_line { |l| lines << l.strip }
40
+ max_index = ( lines.size.to_f * ( percentage.to_f / 100.0 ) ).ceil
41
+ subset = lines[0..max_index]
42
+ return Filelist.new( StringIO.new( subset.join("\n") ) )
43
+ end
44
+
45
+ #
46
+ # call-seq:
47
+ # test.sample_from( Filelist ) -> Filelist
48
+ #
49
+ # Use the *percentage* option to take a random subsampling of data from
50
+ # the input Filelist and return an new Filelist object containing that
51
+ # subset.
52
+ #
53
+ def sample_from( data )
54
+ logger.info "sampling a random #{options['percentage']}% of the data"
55
+ samples = []
56
+ total = 0
57
+ fraction = options['percentage'] / 100.0
58
+ data.each_line do |l|
59
+ total += 1
60
+ if rand < fraction
61
+ samples << l.strip
62
+ end
63
+ end
64
+ logger.info "sampled #{samples.size} of #{total}"
65
+ return Filelist.new( StringIO.new( samples.join("\n") ) )
66
+ end
67
+
68
+ #
69
+ # call-seq:
70
+ # test.run -> nil
71
+ #
72
+ # Part of the Command lifecycle.
73
+ #
74
+ def run
75
+ test_using_random_sample
76
+ test_using_first_of
77
+ end
78
+
79
+ #
80
+ # call-seq:
81
+ # test.test_using_random_sample
82
+ #
83
+ # Run the full test using a random subsample of the original Filelist
84
+ #
85
+ def test_using_random_sample
86
+ @filelist = nil
87
+ sublist = sample_from( self.filelist )
88
+ results = test_using_sublist( sublist )
89
+ output.puts "Test Using Random Sample".center(72)
90
+ output.puts "=" * 72
91
+ report_results( results )
92
+
93
+ end
94
+
95
+ #
96
+ # call-seq:
97
+ # test.test_using_first_of
98
+ #
99
+ # Run the full test using a the first *percentage* of the original
100
+ # Filelist
101
+ #
102
+ def test_using_first_of
103
+ @filelist = nil
104
+ sublist = first_of( self.filelist )
105
+ results = test_using_sublist( sublist )
106
+ output.puts "Test Using First Of".center(72)
107
+ output.puts "=" * 72
108
+ report_results( results )
109
+ end
110
+
111
+ #
112
+ # call-seq:
113
+ # test.test_using_sublist( Filelist ) -> Array of TimedValueMetric
114
+ #
115
+ # given a Filielist of messages run the whole test on them all
116
+ #
117
+ def test_using_sublist( sublist )
118
+ analyzer = Analyzer.new( sublist )
119
+ analyzer.collect_data
120
+ results = []
121
+
122
+ %w[ original_order inode_number first_physical_block_number ].each do |order|
123
+ logger.info "ordering #{analyzer.good_data.size} samples by #{order}"
124
+ tree = ::MultiRBTree.new
125
+ analyzer.good_data.each do |s|
126
+ rank = s.send( order )
127
+ tree[rank] = s
128
+ end
129
+ results << run_test( order, tree.values )
130
+ end
131
+ return results
132
+ end
133
+
134
+ #
135
+ # call-seq:
136
+ # test.report_results( results ) -> nil
137
+ #
138
+ # Write the report of the timings to output
139
+ #
140
+ def report_results( timings )
141
+ t = timings.first
142
+ output.puts
143
+ output.puts " Total files read : #{"%12d" % t.value_stats.count}"
144
+ output.puts " Total bytes read : #{"%12d" % t.value_stats.sum}"
145
+ output.puts " Minimum filesize : #{"%12d" % t.value_stats.min}"
146
+ output.puts " Average filesize : #{"%16.3f" % t.value_stats.mean}"
147
+ output.puts " Maximum filesize : #{"%12d" % t.value_stats.max}"
148
+ output.puts " Stddev of sizes : #{"%16.3f" % t.value_stats.stddev}"
149
+ output.puts
150
+
151
+ output.puts ["%28s" % "read order", "%20s" % "Elapsed time (sec)", "%22s" % "Read rate (bytes/sec)" ].join(" ")
152
+ output.puts "-" * 72
153
+ timings.each do |timing|
154
+ p = [ ]
155
+ p << "%28s" % timing.name
156
+ p << "%20.3f" % timing.timed_stats.sum
157
+ p << "%22.3f" % timing.rate
158
+ output.puts p.join(" ")
159
+ end
160
+ output.puts
161
+ end
162
+ #
163
+ #
164
+ # call-seq:
165
+ # test.run_test( 'original', [ Datum, Dataum, ... ]) -> Hitimes::TimedValueMetric
166
+ #
167
+ # Loop over all the Datum instances in the array and read the contents of
168
+ # the file dumping them to /dev/null. Timings of this process are recorded
169
+ # an a Hitimes::TimedValueMetric is returned which holds the results.
170
+ #
171
+ def run_test( test_name, data )
172
+ logger.info "running #{test_name} test on #{data.size} files"
173
+ self.drop_caches
174
+ timer = ::Hitimes::TimedValueMetric.new( test_name )
175
+ logger.info " begin test"
176
+ data.each do |d|
177
+ timer.start
178
+ bytes = dump_to_dev_null( d )
179
+ timer.stop( bytes )
180
+
181
+ if timer.timed_stats.count % 10_000 == 0 then
182
+ logger.info " processed #{timer.count} at #{"%0.3f" % timer.rate} bytes/sec"
183
+ end
184
+ end
185
+ logger.info " end test"
186
+ logger.info " processed #{timer.timed_stats.count} at #{"%0.3f" % timer.rate} bytes/sec"
187
+ return timer
188
+ end
189
+
190
+ #
191
+ # call-seq:
192
+ # test.drop_caches -> nil
193
+ #
194
+ # Drop the caches on a linux filesystem.
195
+ #
196
+ # See proc(5) and /proc/sys/vm/drop_caches
197
+ #
198
+ def drop_caches
199
+ # old habits die hard
200
+ logger.info " dropping caches"
201
+ 3.times { %x[ /bin/sync ] }
202
+ File.open( "/proc/sys/vm/drop_caches", "w" ) do |f|
203
+ f.puts 3
204
+ end
205
+ end
206
+
207
+ #
208
+ # call-seq:
209
+ # test.dump_to_dev_null( Datum ) -> Integer
210
+ #
211
+ # Write the contents of the file info in Datum to /dev/null and return the
212
+ # number of bytes written.
213
+ #
214
+ def dump_to_dev_null( datum )
215
+ bytes = 0
216
+ File.open( "/dev/null", "w+" ) do |writer|
217
+ File.open( datum.filename, "r") do |reader|
218
+ chunk_size = datum.stat.blksize || 4096
219
+ buf = String.new
220
+ loop do
221
+ begin
222
+ r = reader.sysread( chunk_size, buf )
223
+ bytes += writer.write( r )
224
+ rescue => e
225
+ break
226
+ end
227
+ end
228
+ end
229
+ end
230
+ return bytes
231
+ end
232
+ end
233
+ end
234
+ end
@@ -0,0 +1,181 @@
1
+ require 'rbconfig'
2
+ require 'pathname'
3
+ module Readorder
4
+ #
5
+ # All the block, inode and stat information about one file
6
+ #
7
+ class Datum
8
+
9
+ # The fully qualified path of the file
10
+ attr_reader :filename
11
+
12
+ # The inode number of the file
13
+ attr_reader :inode_number
14
+
15
+ # The physical block number of the first disc block of the file. This piece
16
+ # of data may not be gathered. This will be nil if that is the case
17
+ attr_reader :first_physical_block_number
18
+
19
+ # if there is a reason this file is not eligible for analysis this explains
20
+ # why
21
+ attr_reader :error_reason
22
+
23
+ # File::Stat of the file
24
+ attr_reader :stat
25
+
26
+ # count of the number of physical disc blocks this file consumes. This is
27
+ # only gathered if the *first_physical_block_number* is also gathered.
28
+ attr_reader :physical_block_count
29
+
30
+ # the original order in which the Datum was collected
31
+ attr_accessor :original_order
32
+
33
+ # Check if we are running on linux. We use this to enable
34
+ # us to check the physical block id.
35
+ def self.is_linux?
36
+ @is_linux ||= ::Config::CONFIG['host_os'] =~ /linux/i
37
+ end
38
+
39
+ #
40
+ # call-seq:
41
+ # Datum.new( filename ) -> Datum
42
+ #
43
+ # Create a new Datum instance for the given filename
44
+ #
45
+ def initialize( filename )
46
+ @filename = ::File.expand_path( filename.strip )
47
+ @inode_number = nil
48
+ @first_physical_block_number = nil
49
+ @physical_block_count = 0
50
+ @error_reason = nil
51
+ @original_order = 0
52
+
53
+ @stat = nil
54
+ @valid = false
55
+ @collected = false
56
+ end
57
+
58
+ #
59
+ # call-seq:
60
+ # datum.size -> Integer
61
+ #
62
+ # The number of bytes the file consumes
63
+ #
64
+ def size
65
+ @stat.size
66
+ end
67
+
68
+ #
69
+ # call-seq:
70
+ # datum.logger -> Logger
71
+ #
72
+ # The Logger for the instance
73
+ #
74
+ def logger
75
+ ::Logging::Logger[self]
76
+ end
77
+
78
+ #
79
+ # :call-seq:
80
+ # datum.collect( get_physical = true ) -> true
81
+ #
82
+ # Collect all the information about the file we need.
83
+ # This includes:
84
+ #
85
+ # * making sure we have a valid file, this means the file exists
86
+ # and is non-zero in size
87
+ # * getting the inode number of the file
88
+ # * getting the physical block number of the first block of the file
89
+ # * getting the device of the file
90
+ #
91
+ # If false is passed in, then the physical block number is not
92
+ # collected.
93
+ #
94
+ def collect( get_physical = true )
95
+ unless @collected then
96
+ begin
97
+ @stat = ::File.stat( @filename )
98
+ if not @stat.file? then
99
+ @valid = false
100
+ @error_reason = "Not a file"
101
+ elsif @stat.zero? then
102
+ @valid = false
103
+ @error_reason = "0 byte file"
104
+ else
105
+ @inode_number = @stat.ino
106
+ if get_physical then
107
+ @first_physical_block_number = self.find_first_physical_block_number
108
+ end
109
+ @valid = true
110
+ end
111
+ rescue => e
112
+ @error_reason = e.to_s
113
+ logger.warn e.to_s
114
+ @valid = false
115
+ ensure
116
+ @collected = true
117
+ end
118
+ end
119
+ return @collected
120
+ end
121
+
122
+ #
123
+ # call-seq:
124
+ # datum.valid?
125
+ #
126
+ # Does this Datum represent a collection of valid data
127
+ #
128
+ def valid?
129
+ @valid
130
+ end
131
+
132
+ ####
133
+ # Not part of the public api
134
+ protected
135
+
136
+ # find the mountpoint for this datum. We traverse up the Pathname
137
+ # of the datum until we get to a parent where #mountpoint? is true
138
+ #
139
+ =begin
140
+ def find_mountpoint
141
+ p = Pathname.new( @filename ).parent
142
+ until p.mountpoint? do
143
+ p = p.parent
144
+ end
145
+ return p.to_s
146
+ end
147
+ =end
148
+
149
+ #
150
+ # call-seq:
151
+ # datum.find_first_physical_block_number -> Integer
152
+ #
153
+ # find the first physical block number, this only applies to linux
154
+ # machines.
155
+ #
156
+ # This is only called within the context of the #collect method
157
+ #
158
+ def find_first_physical_block_number
159
+ return nil unless Datum.is_linux?
160
+
161
+ first_block_num = 0
162
+ File.open( @filename ) do |f|
163
+ @stat.blocks.times do |i|
164
+
165
+ j = [i].pack("i")
166
+ # FIBMAP = 0x00000001
167
+ f.ioctl( 0x00000001, j )
168
+ block_id = j.unpack("i")[0]
169
+
170
+ if block_id > 0 then
171
+ first_block_num = block_id if block_id < first_block_num || first_block_num == 0
172
+ @physical_block_count += 1
173
+ end
174
+
175
+ end
176
+ end
177
+ return first_block_num
178
+
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,61 @@
1
+ module Readorder
2
+ #
3
+ # An interator over the contents of a bunch of files or IO objects
4
+ # depending on the initializer.
5
+ #
6
+ class Filelist
7
+ class Error < ::Readorder::Error; end
8
+
9
+ def initialize( sources = [] )
10
+ @sources = [ sources ].flatten
11
+ @current_source = nil
12
+ @sources.each do |s|
13
+ case s
14
+ when String
15
+ raise Error, "#{s} does not exist" unless File.exist?( s )
16
+ raise Error, "#{s} is not readable" unless File.readable?( s )
17
+ else
18
+ [ :gets, :close ].each do |meth|
19
+ raise Error, "#{s.inspect} does not respond to '#{meth}'" unless s.respond_to? meth
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ def current_source
26
+ if not @current_source then
27
+ cs = @sources.shift
28
+ case cs
29
+ when String
30
+ @current_source = File.open( cs )
31
+ else
32
+ # nil or respond_to? :gets
33
+ @current_source = cs
34
+ end
35
+ end
36
+ return @current_source
37
+ end
38
+
39
+ # return the next line from the sources, opening a new source if
40
+ # need be
41
+ def gets
42
+ loop do
43
+ return nil unless self.current_source
44
+ line = self.current_source.gets
45
+ return line if line
46
+
47
+ @current_source.close unless @current_source == $stdin
48
+ @current_source = nil
49
+ end
50
+ end
51
+
52
+ #
53
+ # Iterator yielding the line returned, stopping on no more lines
54
+ #
55
+ def each_line
56
+ while line = self.gets do
57
+ yield line
58
+ end
59
+ end
60
+ end
61
+ end