readorder 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY +4 -0
- data/LICENSE +13 -0
- data/README +158 -0
- data/bin/readorder +11 -0
- data/gemspec.rb +53 -0
- data/lib/readorder/analyzer.rb +170 -0
- data/lib/readorder/cli.rb +159 -0
- data/lib/readorder/command.rb +147 -0
- data/lib/readorder/commands/analyze.rb +17 -0
- data/lib/readorder/commands/sort.rb +26 -0
- data/lib/readorder/commands/test.rb +234 -0
- data/lib/readorder/datum.rb +181 -0
- data/lib/readorder/filelist.rb +61 -0
- data/lib/readorder/log.rb +58 -0
- data/lib/readorder/paths.rb +69 -0
- data/lib/readorder/runner.rb +48 -0
- data/lib/readorder/version.rb +30 -0
- data/lib/readorder.rb +24 -0
- data/spec/analyzer_spec.rb +51 -0
- data/spec/command_spec.rb +37 -0
- data/spec/filelist_spec.rb +53 -0
- data/spec/log_spec.rb +13 -0
- data/spec/paths_spec.rb +45 -0
- data/spec/runner_spec.rb +46 -0
- data/spec/spec_helper.rb +57 -0
- data/spec/version_spec.rb +16 -0
- data/tasks/announce.rake +39 -0
- data/tasks/config.rb +107 -0
- data/tasks/distribution.rake +38 -0
- data/tasks/documentation.rake +32 -0
- data/tasks/rspec.rake +29 -0
- data/tasks/rubyforge.rake +51 -0
- data/tasks/utils.rb +80 -0
- metadata +161 -0
data/HISTORY
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2009, Jeremy Hinegardner
|
2
|
+
|
3
|
+
Permission to use, copy, modify, and/or distribute this software for any
|
4
|
+
purpose with or without fee is hereby granted, provided that the above
|
5
|
+
copyright notice and this permission notice appear in all copies.
|
6
|
+
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
8
|
+
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
9
|
+
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
10
|
+
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
11
|
+
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
12
|
+
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
13
|
+
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,158 @@
|
|
1
|
+
== Readorder
|
2
|
+
|
3
|
+
* Homepage[http://copiousfreetime.rubyforge.org/readorder/]
|
4
|
+
* {Rubyforge Project}[http://rubyforge.org/projects/copiousfreetime/]
|
5
|
+
* email jeremy at copiousfreetime dot org
|
6
|
+
* git clone git://github.com/copiousfreetime/readorder.git
|
7
|
+
|
8
|
+
== DESCRIPTION
|
9
|
+
|
10
|
+
Readorder orders a list of files into a more effective read order.
|
11
|
+
|
12
|
+
You would possibly want to use readorder in a case where you know ahead
|
13
|
+
of time that you have a large quantity of files on disc to process. You
|
14
|
+
can give that list off those files and it will report back to you the
|
15
|
+
order in which you should process them to make most effective use of
|
16
|
+
your disc I/O.
|
17
|
+
|
18
|
+
Given a list of filenames, either on the command line or via stdin,
|
19
|
+
readorder will output the filenames in an order that should increase
|
20
|
+
the I/O throughput when the files corresponding to the filenames are
|
21
|
+
read off of disc.
|
22
|
+
|
23
|
+
The output order of the filenames can either be in inode order or
|
24
|
+
physical disc block order. This is dependent upon operating system
|
25
|
+
support and permission level of the user running readorder.
|
26
|
+
|
27
|
+
== COMMANDS
|
28
|
+
|
29
|
+
=== Sort
|
30
|
+
|
31
|
+
Given a list of filenames, either on the command line or via stdin,
|
32
|
+
output the filenames in an order that should increase the I/O
|
33
|
+
throughput when the contents files are read from disc.
|
34
|
+
|
35
|
+
==== Synopsis
|
36
|
+
|
37
|
+
readorder sort [filelist*] [options]+
|
38
|
+
|
39
|
+
filelist (-1 ~> filelist=#<IO:0x1277e4>)
|
40
|
+
The files containing filenames
|
41
|
+
--inode
|
42
|
+
Only use inode order do not attempt physical block order
|
43
|
+
--log-level=log-level (0 ~> log-level=info)
|
44
|
+
The verbosity of logging, one of [ debug, info, warn, error, fatal ]
|
45
|
+
--log-file=log-file (0 ~> log-file)
|
46
|
+
Log to this file instead of stderr
|
47
|
+
--output=output (0 ~> output)
|
48
|
+
Where to write the output
|
49
|
+
--error-filelist=error-filelist (0 ~> error-filelist)
|
50
|
+
Write all the files from the filelist that had errors to this file
|
51
|
+
--help, -h
|
52
|
+
|
53
|
+
==== Example Output
|
54
|
+
|
55
|
+
=== Analyze
|
56
|
+
|
57
|
+
Take the list of filenames and output an analysis of the volume of
|
58
|
+
data in those files.
|
59
|
+
|
60
|
+
==== Synopsis
|
61
|
+
|
62
|
+
readorder analyze [filelist*] [options]+
|
63
|
+
|
64
|
+
filelist (-1 ~> filelist=#<IO:0x1277e4>)
|
65
|
+
The files containing filenames
|
66
|
+
--log-level=log-level (0 ~> log-level=info)
|
67
|
+
The verbosity of logging, one of [ debug, info, warn, error, fatal ]
|
68
|
+
--log-file=log-file (0 ~> log-file)
|
69
|
+
Log to this file instead of stderr
|
70
|
+
--output=output (0 ~> output)
|
71
|
+
Where to write the output
|
72
|
+
--error-filelist=error-filelist (0 ~> error-filelist)
|
73
|
+
Write all the files from the filelist that had errors to this file
|
74
|
+
--data-csv=data-csv (0 ~> data-csv)
|
75
|
+
Write the raw data collected to this csv file
|
76
|
+
--help, -h
|
77
|
+
|
78
|
+
==== Example Output
|
79
|
+
|
80
|
+
=== Test
|
81
|
+
|
82
|
+
Give a list of filenames, either on the commandline or via stdin,
|
83
|
+
take a random subsample of them and read all the contents of those
|
84
|
+
files in different orders.
|
85
|
+
|
86
|
+
* in initial given order
|
87
|
+
* in inode order
|
88
|
+
* in physical block order
|
89
|
+
|
90
|
+
Output a report of the various times take to read the files.
|
91
|
+
|
92
|
+
This command requires elevated priveleges to run. It will purge your disc
|
93
|
+
cache multiple times while running, and will spike the I/O of your machine.
|
94
|
+
Run with care.
|
95
|
+
|
96
|
+
==== Synopsis
|
97
|
+
|
98
|
+
readorder test [filelist*] [options]+
|
99
|
+
|
100
|
+
filelist (-1 ~> filelist=#<IO:0x1277e4>)
|
101
|
+
The files containing filenames
|
102
|
+
--percentage=percentage (0 ~> int(percentage))
|
103
|
+
What random percentage of input files to select
|
104
|
+
--log-level=log-level (0 ~> log-level=info)
|
105
|
+
The verbosity of logging, one of [ debug, info, warn, error, fatal ]
|
106
|
+
--log-file=log-file (0 ~> log-file)
|
107
|
+
Log to this file instead of stderr
|
108
|
+
--error-filelist=error-filelist (0 ~> error-filelist)
|
109
|
+
Write all the files from the filelist that had errors to this file
|
110
|
+
--help, -h
|
111
|
+
|
112
|
+
==== Example result
|
113
|
+
|
114
|
+
|
115
|
+
Test Using First Of
|
116
|
+
========================================================================
|
117
|
+
|
118
|
+
Total files read : 8052
|
119
|
+
Total bytes read : 6575824
|
120
|
+
Minimum filesize : 637
|
121
|
+
Average filesize : 816.670
|
122
|
+
Maximum filesize : 1393
|
123
|
+
Stddev of sizes : 86.936
|
124
|
+
|
125
|
+
read order Elapsed time (sec) Read rate (bytes/sec)
|
126
|
+
------------------------------------------------------------------------
|
127
|
+
original_order 352.403 18659.944
|
128
|
+
inode_number 53.606 122669.175
|
129
|
+
first_physical_block_number 47.520 138379.024
|
130
|
+
|
131
|
+
This is the output of a a <tt>readorder test</tt> command run on a directory on
|
132
|
+
a ReiserFS filesytem containing 805,038 files, constituting 657,543,700 bytes
|
133
|
+
of data. A sample of 1% of the files was used for the test.
|
134
|
+
|
135
|
+
If we process them in their original order we can see that this will
|
136
|
+
potentially take us 9.78 hours. If we process them in physical block number
|
137
|
+
order that is reduces to 1.31 hours.
|
138
|
+
|
139
|
+
== CREDITS
|
140
|
+
|
141
|
+
* Linux System Programming by Robert Love
|
142
|
+
* {readahead project}[https://fedorahosted.org/readahead/]
|
143
|
+
|
144
|
+
== ISC LICENSE
|
145
|
+
|
146
|
+
Copyright (c) 2009, Jeremy Hinegardner
|
147
|
+
|
148
|
+
Permission to use, copy, modify, and/or distribute this software for any
|
149
|
+
purpose with or without fee is hereby granted, provided that the above
|
150
|
+
copyright notice and this permission notice appear in all copies.
|
151
|
+
|
152
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
153
|
+
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
154
|
+
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
155
|
+
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
156
|
+
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
157
|
+
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
158
|
+
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
data/bin/readorder
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2009
|
5
|
+
# All rights reserved. See LICENSE and/or COPYING for details.
|
6
|
+
#++
|
7
|
+
|
8
|
+
$:.unshift File.expand_path(File.join(File.dirname(__FILE__),"..","lib"))
|
9
|
+
require 'readorder'
|
10
|
+
|
11
|
+
::Readorder::Cli.new( ARGV, ENV ).run
|
data/gemspec.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'readorder/version'
|
3
|
+
require 'tasks/config'
|
4
|
+
|
5
|
+
Readorder::GEM_SPEC = Gem::Specification.new do |spec|
|
6
|
+
proj = Configuration.for('project')
|
7
|
+
spec.name = proj.name
|
8
|
+
spec.version = Readorder::VERSION
|
9
|
+
|
10
|
+
spec.author = proj.author
|
11
|
+
spec.email = proj.email
|
12
|
+
spec.homepage = proj.homepage
|
13
|
+
spec.summary = proj.summary
|
14
|
+
spec.description = proj.description
|
15
|
+
spec.platform = Gem::Platform::RUBY
|
16
|
+
|
17
|
+
|
18
|
+
pkg = Configuration.for('packaging')
|
19
|
+
spec.files = pkg.files.all
|
20
|
+
spec.executables = pkg.files.bin.collect { |b| File.basename(b) }
|
21
|
+
|
22
|
+
# add dependencies here
|
23
|
+
spec.add_dependency("configuration", "~> 0.0.5")
|
24
|
+
spec.add_dependency("rbtree", "~> 0.2.1")
|
25
|
+
spec.add_dependency("main", "~> 2.8.3")
|
26
|
+
spec.add_dependency("logging", "~> 1.1.4")
|
27
|
+
spec.add_dependency("hitimes", "~> 1.0.1")
|
28
|
+
|
29
|
+
spec.add_development_dependency( "rake", "~> 0.8.3")
|
30
|
+
|
31
|
+
if ext_conf = Configuration.for_if_exist?("extension") then
|
32
|
+
spec.extensions << ext_conf.configs
|
33
|
+
spec.extensions.flatten!
|
34
|
+
spec.require_paths << "ext"
|
35
|
+
end
|
36
|
+
|
37
|
+
if rdoc = Configuration.for_if_exist?('rdoc') then
|
38
|
+
spec.has_rdoc = true
|
39
|
+
spec.extra_rdoc_files = pkg.files.rdoc
|
40
|
+
spec.rdoc_options = rdoc.options + [ "--main" , rdoc.main_page ]
|
41
|
+
else
|
42
|
+
spec.has_rdoc = false
|
43
|
+
end
|
44
|
+
|
45
|
+
if test = Configuration.for_if_exist?('testing') then
|
46
|
+
spec.test_files = test.files
|
47
|
+
end
|
48
|
+
|
49
|
+
if rf = Configuration.for_if_exist?('rubyforge') then
|
50
|
+
spec.rubyforge_project = rf.project
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
@@ -0,0 +1,170 @@
|
|
1
|
+
require 'hitimes'
|
2
|
+
require 'readorder/datum'
|
3
|
+
require 'rbtree'
|
4
|
+
|
5
|
+
module Readorder
|
6
|
+
#
|
7
|
+
# Use the given Filelist and traverse all the file collecting the
|
8
|
+
# appropriate Datum instances
|
9
|
+
#
|
10
|
+
class Analyzer
|
11
|
+
# an Array of Datum instances for files that cannot be processed
|
12
|
+
attr_accessor :bad_data
|
13
|
+
|
14
|
+
# an Array of Datum instances in the order they were processed
|
15
|
+
attr_accessor :good_data
|
16
|
+
|
17
|
+
# an RBTree of Datum instances of those files that were analyzed
|
18
|
+
# in order by phyiscal disc block number. This only has items if
|
19
|
+
# the physical block number was obtained. It is empty otherwise
|
20
|
+
attr_accessor :physical_order
|
21
|
+
|
22
|
+
# an RBTree of Datum instances of those files that were analyzed
|
23
|
+
# in order by inode
|
24
|
+
attr_accessor :inode_order
|
25
|
+
|
26
|
+
#
|
27
|
+
# Initialize the Analyzer with the Filelist object and whether or
|
28
|
+
# not to gather the physical block size.
|
29
|
+
#
|
30
|
+
def initialize( filelist, get_physical = true )
|
31
|
+
@filelist = filelist
|
32
|
+
@bad_data = []
|
33
|
+
@good_data = []
|
34
|
+
@physical_order = ::MultiRBTree.new
|
35
|
+
@inode_order = ::MultiRBTree.new
|
36
|
+
@get_physical = get_physical
|
37
|
+
@size_metric = ::Hitimes::ValueMetric.new( 'size' )
|
38
|
+
@time_metric = ::Hitimes::TimedMetric.new( 'time' )
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# call-seq:
|
43
|
+
# analyzer.logger -> Logger
|
44
|
+
#
|
45
|
+
# return the Logger instance for the Analyzer
|
46
|
+
#
|
47
|
+
def logger
|
48
|
+
::Logging::Logger[self]
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# call-seq:
|
53
|
+
# analyzer.collect_data -> nil
|
54
|
+
#
|
55
|
+
# Run data collections over the Filelist and store the results into
|
56
|
+
# *good_data* or *bad_data* as appropriate. A status message is written to the
|
57
|
+
# log every 10,000 files processed
|
58
|
+
#
|
59
|
+
def collect_data
|
60
|
+
logger.info "Begin data collection"
|
61
|
+
original_order = 0
|
62
|
+
@filelist.each_line do |fname|
|
63
|
+
#logger.debug " analyzing #{fname.strip}"
|
64
|
+
@time_metric.measure do
|
65
|
+
d = Datum.new( fname )
|
66
|
+
d.collect( @get_physical )
|
67
|
+
d.original_order = original_order
|
68
|
+
if d.valid? then
|
69
|
+
@good_data << d
|
70
|
+
@size_metric.measure d.stat.size
|
71
|
+
@inode_order[d.inode_number] = d
|
72
|
+
if @get_physical then
|
73
|
+
@physical_order[d.first_physical_block_number] = d
|
74
|
+
end
|
75
|
+
else
|
76
|
+
@bad_data << d
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
if @time_metric.count % 10_000 == 0 then
|
81
|
+
logger.info " processed #{@time_metric.count} at #{"%0.3f" % @time_metric.rate} files/sec"
|
82
|
+
end
|
83
|
+
original_order += 1
|
84
|
+
end
|
85
|
+
logger.info " processed #{@time_metric.count} at #{"%0.3f" % @time_metric.rate} files/sec"
|
86
|
+
logger.info " yielded #{@good_data.size} data points"
|
87
|
+
logger.info "End data collection"
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
|
91
|
+
#
|
92
|
+
# call-seq:
|
93
|
+
# analyzer.log_summary_report -> nil
|
94
|
+
#
|
95
|
+
# Write the summary report to the #logger
|
96
|
+
#
|
97
|
+
def log_summary_report
|
98
|
+
summary_report.split("\n").each do |l|
|
99
|
+
logger.info l
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
#
|
104
|
+
# call-seq:
|
105
|
+
# analyzer.summary_report -> String
|
106
|
+
#
|
107
|
+
# Generate a summary report of how long it took to analyze the files and the
|
108
|
+
# filesizes found. return it as a String
|
109
|
+
#
|
110
|
+
def summary_report
|
111
|
+
s = StringIO.new
|
112
|
+
s.puts "Files analyzed : #{"%12d" % @time_metric.count}"
|
113
|
+
s.puts "Elapsed time : #{"%12d" % @time_metric.duration} seconds"
|
114
|
+
s.puts "Collection Rate : #{"%16.3f" % @time_metric.rate} files/sec"
|
115
|
+
s.puts "Good files : #{"%12d" % @good_data.size}"
|
116
|
+
s.puts " average size : #{"%16.3f" % @size_metric.mean} bytes"
|
117
|
+
s.puts " minimum size : #{"%16.3f" % @size_metric.min} bytes"
|
118
|
+
s.puts " maximum size : #{"%16.3f" % @size_metric.max} bytes"
|
119
|
+
s.puts " sum of sizes : #{"%12d" % @size_metric.sum} bytes"
|
120
|
+
s.puts "Bad files : #{"%12d" % @bad_data.size}"
|
121
|
+
return s.string
|
122
|
+
end
|
123
|
+
|
124
|
+
#
|
125
|
+
# call-seq:
|
126
|
+
# analyzer.dump_data_to( IO ) -> nil
|
127
|
+
#
|
128
|
+
# write a csv to the _IO_ object passed in. The format is:
|
129
|
+
#
|
130
|
+
# error reason,filename
|
131
|
+
#
|
132
|
+
# If there are no bad Datum instances then do not write anything.
|
133
|
+
#
|
134
|
+
def dump_bad_data_to( io )
|
135
|
+
if bad_data.size > 0 then
|
136
|
+
io.puts "error_reason,filename"
|
137
|
+
bad_data.each do |d|
|
138
|
+
io.puts "#{d.error_reason},#{d.filename}"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
nil
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
#
|
146
|
+
# call-seq:
|
147
|
+
# analyzer.dump_good_data_to( IO ) -> nil
|
148
|
+
#
|
149
|
+
# Write a csv fo the _IO_ object passed in. The format is:
|
150
|
+
#
|
151
|
+
# filename,size,inode_number,physical_block_count,first_physical_block_number
|
152
|
+
#
|
153
|
+
# The last two fields *physical_block_count* and *first_physical_block_number* are
|
154
|
+
# only written if the analyzer was able to gather physical block information
|
155
|
+
#
|
156
|
+
def dump_good_data_to( io )
|
157
|
+
fields = %w[ filename size inode_number ]
|
158
|
+
if @get_physical then
|
159
|
+
fields << 'physical_block_count'
|
160
|
+
fields << 'first_physical_block_number'
|
161
|
+
end
|
162
|
+
|
163
|
+
io.puts fields.join(",")
|
164
|
+
good_data.each do |d|
|
165
|
+
f = fields.collect { |f| d.send( f ) }
|
166
|
+
io.puts f.join(",")
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
@@ -0,0 +1,159 @@
|
|
1
|
+
require 'main'
|
2
|
+
require 'readorder/runner'
|
3
|
+
|
4
|
+
module Readorder
|
5
|
+
Cli = Main.create {
|
6
|
+
author "Copyright 2009 (c) Jeremy Hinegardner"
|
7
|
+
version ::Readorder::VERSION
|
8
|
+
|
9
|
+
description <<-txt
|
10
|
+
Readorder orders a list of files into a more efficient read order.
|
11
|
+
|
12
|
+
Given a list of filenames, either on the command line or via stdin,
|
13
|
+
output the filenames in an order that should increase the I/O
|
14
|
+
throughput when the contents files are read from disc.
|
15
|
+
txt
|
16
|
+
|
17
|
+
run { help! }
|
18
|
+
|
19
|
+
## --- Modes --
|
20
|
+
## Default mode is sort, which is when no mode is given
|
21
|
+
|
22
|
+
mode( :sort ) {
|
23
|
+
description <<-txt
|
24
|
+
Given a list of filenames, either on the command line or via stdin,
|
25
|
+
output the filenames in an order that should increase the I/O
|
26
|
+
throughput when the contents files are read from disc.
|
27
|
+
txt
|
28
|
+
|
29
|
+
option( 'inode' ) {
|
30
|
+
description "Only use inode order do not attempt physical block order"
|
31
|
+
cast :boolean
|
32
|
+
}
|
33
|
+
|
34
|
+
mixin :option_log_level
|
35
|
+
mixin :option_log_file
|
36
|
+
mixin :argument_filelist
|
37
|
+
mixin :option_output
|
38
|
+
mixin :option_error_filelist
|
39
|
+
|
40
|
+
run { Cli.run_command_with_params( 'sort', params ) }
|
41
|
+
}
|
42
|
+
|
43
|
+
mode( :analyze ) {
|
44
|
+
description <<-txt
|
45
|
+
Take the list of filenames and output an analysis of the volume of
|
46
|
+
data in those files.
|
47
|
+
txt
|
48
|
+
|
49
|
+
mixin :option_log_level
|
50
|
+
mixin :option_log_file
|
51
|
+
mixin :argument_filelist
|
52
|
+
mixin :option_output
|
53
|
+
mixin :option_error_filelist
|
54
|
+
|
55
|
+
option( 'data-csv' ) {
|
56
|
+
description "Write the raw data collected to this csv file"
|
57
|
+
argument :required
|
58
|
+
validate { |f| File.directory?( File.dirname(File.expand_path( f ) ) ) }
|
59
|
+
}
|
60
|
+
|
61
|
+
run { Cli.run_command_with_params( 'analyze', params ) }
|
62
|
+
}
|
63
|
+
|
64
|
+
mode( :test ) {
|
65
|
+
description <<-txt
|
66
|
+
Give a list of filenames, either on the commandline or via stdin,
|
67
|
+
take a random subsample of them and read all the contents of those
|
68
|
+
files in different orders.
|
69
|
+
|
70
|
+
1) in initial given order
|
71
|
+
2) in inode order
|
72
|
+
3) in physical block order
|
73
|
+
|
74
|
+
Output a report of the various times take to read the files.
|
75
|
+
|
76
|
+
This command requires elevated priveleges to run and will spike the
|
77
|
+
I/O of your machine. Run with care.
|
78
|
+
txt
|
79
|
+
option( :percentage ) {
|
80
|
+
description "What random percentage of input files to select"
|
81
|
+
argument :required
|
82
|
+
default "10"
|
83
|
+
validate { |p|
|
84
|
+
pi = Float(p)
|
85
|
+
(pi > 0) and (pi <= 100)
|
86
|
+
}
|
87
|
+
cast :float
|
88
|
+
}
|
89
|
+
mixin :option_log_level
|
90
|
+
mixin :option_log_file
|
91
|
+
mixin :option_output
|
92
|
+
mixin :argument_filelist
|
93
|
+
mixin :option_error_filelist
|
94
|
+
|
95
|
+
run { Cli.run_command_with_params( 'test', params ) }
|
96
|
+
}
|
97
|
+
|
98
|
+
## --- Mixins ---
|
99
|
+
mixin :argument_filelist do
|
100
|
+
argument('filelist') {
|
101
|
+
description "The files containing filenames"
|
102
|
+
arity '*'
|
103
|
+
default [ $stdin ]
|
104
|
+
required false
|
105
|
+
}
|
106
|
+
end
|
107
|
+
|
108
|
+
mixin :option_log_level do
|
109
|
+
option( 'log-level' ) do
|
110
|
+
description "The verbosity of logging, one of [ #{::Logging::LNAMES.map {|l| l.downcase }.join(', ')} ]"
|
111
|
+
argument :required
|
112
|
+
default 'info'
|
113
|
+
validate { |l| %w[ debug info warn error fatal off ].include?( l.downcase ) }
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
mixin :option_log_file do
|
118
|
+
option( 'log-file' ) do
|
119
|
+
description "Log to this file instead of stderr"
|
120
|
+
argument :required
|
121
|
+
validate { |f| File.directory?( File.dirname(File.expand_path( f ) ) ) }
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
mixin :option_output do
|
126
|
+
option( 'output' ) do
|
127
|
+
description "Where to write the output"
|
128
|
+
argument :required
|
129
|
+
validate { |f| File.directory?( File.dirname(File.expand_path( f ) ) ) }
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
mixin :option_error_filelist do
|
134
|
+
option('error-filelist') do
|
135
|
+
description "Write all the files from the filelist that had errors to this file"
|
136
|
+
argument :required
|
137
|
+
validate { |f| File.directory?( File.dirname(File.expand_path( f ) ) ) }
|
138
|
+
end
|
139
|
+
end
|
140
|
+
}
|
141
|
+
|
142
|
+
|
143
|
+
#
|
144
|
+
# Convert the Parameters::List that exists as the parameter from Main
|
145
|
+
#
|
146
|
+
#
|
147
|
+
def Cli.params_to_hash( params )
|
148
|
+
(hash = params.to_hash ).keys.each do |key|
|
149
|
+
v = hash[key].values
|
150
|
+
v = v.first if v.size <= 1
|
151
|
+
hash[key] = v
|
152
|
+
end
|
153
|
+
return hash
|
154
|
+
end
|
155
|
+
|
156
|
+
def Cli.run_command_with_params( command, params )
|
157
|
+
::Readorder::Runner.new( Cli.params_to_hash( params ) ).run( command )
|
158
|
+
end
|
159
|
+
end
|