readorder 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY +9 -0
- data/gemspec.rb +2 -2
- data/lib/readorder.rb +7 -0
- data/lib/readorder/analyzer.rb +45 -47
- data/lib/readorder/cli.rb +12 -0
- data/lib/readorder/command.rb +31 -8
- data/lib/readorder/commands/sort.rb +10 -7
- data/lib/readorder/datum.rb +32 -1
- data/lib/readorder/results.rb +224 -0
- data/lib/readorder/version.rb +1 -1
- data/spec/analyzer_spec.rb +36 -11
- data/tasks/distribution.rake +1 -1
- metadata +7 -5
data/HISTORY
CHANGED
@@ -1,4 +1,13 @@
|
|
1
1
|
= Changelog
|
2
|
+
|
3
|
+
== Version 2.0.0 - 2009-08-24
|
4
|
+
|
5
|
+
=== Enhancements
|
6
|
+
|
7
|
+
* complete rewrite of internal file sorting and temporary storage enabling
|
8
|
+
the 'readordering' of large lists of files.
|
9
|
+
* switch to amalgalite instead of rbtree for sorting and storage
|
10
|
+
|
2
11
|
== Version 1.0.0
|
3
12
|
|
4
13
|
* Initial public release
|
data/gemspec.rb
CHANGED
@@ -21,10 +21,10 @@ Readorder::GEM_SPEC = Gem::Specification.new do |spec|
|
|
21
21
|
|
22
22
|
# add dependencies here
|
23
23
|
spec.add_dependency("configuration", "~> 0.0.5")
|
24
|
-
spec.add_dependency("
|
24
|
+
spec.add_dependency("amalgalite", "~> 0.11.0")
|
25
25
|
spec.add_dependency("main", "~> 2.8.3")
|
26
26
|
spec.add_dependency("logging", "~> 1.1.4")
|
27
|
-
spec.add_dependency("hitimes", "~> 1.0.
|
27
|
+
spec.add_dependency("hitimes", "~> 1.0.4")
|
28
28
|
|
29
29
|
spec.add_development_dependency( "rake", "~> 0.8.3")
|
30
30
|
|
data/lib/readorder.rb
CHANGED
@@ -16,6 +16,13 @@ end
|
|
16
16
|
|
17
17
|
require 'rubygems'
|
18
18
|
require 'logging'
|
19
|
+
|
20
|
+
# require amalgalite explicitly before hitimes explicitly because of
|
21
|
+
# using flat namespace on OSX
|
22
|
+
# http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/344658
|
23
|
+
require 'amalgalite'
|
24
|
+
require 'hitimes'
|
25
|
+
|
19
26
|
require 'readorder/version'
|
20
27
|
require 'readorder/paths'
|
21
28
|
require 'readorder/cli'
|
data/lib/readorder/analyzer.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
|
-
require 'hitimes'
|
2
1
|
require 'readorder/datum'
|
3
|
-
require '
|
2
|
+
require 'readorder/results'
|
4
3
|
|
5
4
|
module Readorder
|
6
5
|
#
|
@@ -8,34 +7,27 @@ module Readorder
|
|
8
7
|
# appropriate Datum instances
|
9
8
|
#
|
10
9
|
class Analyzer
|
11
|
-
#
|
12
|
-
attr_accessor :
|
10
|
+
# number of bad_data items encountered
|
11
|
+
attr_accessor :bad_data_count
|
12
|
+
|
13
|
+
# number of good_data items encountered
|
14
|
+
attr_accessor :good_data_count
|
13
15
|
|
14
|
-
#
|
15
|
-
attr_accessor :
|
16
|
-
|
17
|
-
# an RBTree of Datum instances of those files that were analyzed
|
18
|
-
# in order by phyiscal disc block number. This only has items if
|
19
|
-
# the physical block number was obtained. It is empty otherwise
|
20
|
-
attr_accessor :physical_order
|
21
|
-
|
22
|
-
# an RBTree of Datum instances of those files that were analyzed
|
23
|
-
# in order by inode
|
24
|
-
attr_accessor :inode_order
|
16
|
+
# The Results handler
|
17
|
+
attr_accessor :results
|
25
18
|
|
26
19
|
#
|
27
20
|
# Initialize the Analyzer with the Filelist object and whether or
|
28
21
|
# not to gather the physical block size.
|
29
22
|
#
|
30
|
-
def initialize( filelist, get_physical = true )
|
23
|
+
def initialize( filelist, results, get_physical = true )
|
31
24
|
@filelist = filelist
|
32
|
-
@bad_data = []
|
33
|
-
@good_data = []
|
34
|
-
@physical_order = ::MultiRBTree.new
|
35
|
-
@inode_order = ::MultiRBTree.new
|
36
25
|
@get_physical = get_physical
|
37
26
|
@size_metric = ::Hitimes::ValueMetric.new( 'size' )
|
38
27
|
@time_metric = ::Hitimes::TimedMetric.new( 'time' )
|
28
|
+
@results = results
|
29
|
+
@bad_data_count = 0
|
30
|
+
@good_data_count = 0
|
39
31
|
end
|
40
32
|
|
41
33
|
#
|
@@ -60,30 +52,35 @@ module Readorder
|
|
60
52
|
logger.info "Begin data collection"
|
61
53
|
original_order = 0
|
62
54
|
@filelist.each_line do |fname|
|
63
|
-
|
55
|
+
next if @results.has_datum_for_filename?( fname )
|
56
|
+
logger.debug " analyzing #{fname.strip}"
|
64
57
|
@time_metric.measure do
|
65
58
|
d = Datum.new( fname )
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
@
|
71
|
-
|
72
|
-
if
|
73
|
-
@
|
59
|
+
begin
|
60
|
+
d.collect( @get_physical )
|
61
|
+
d.original_order = original_order
|
62
|
+
|
63
|
+
@results.add_datum( d )
|
64
|
+
|
65
|
+
if d.valid? then
|
66
|
+
@size_metric.measure d.stat.size
|
67
|
+
@good_data_count += 1
|
68
|
+
else
|
69
|
+
@bad_data_count += 1
|
74
70
|
end
|
75
|
-
|
76
|
-
|
71
|
+
rescue => e
|
72
|
+
logger.error "#{e} : #{d.to_hash.inspect}"
|
77
73
|
end
|
78
74
|
end
|
79
75
|
|
80
76
|
if @time_metric.count % 10_000 == 0 then
|
81
|
-
logger.info " processed #{@time_metric.count} at #{"%0.3f" % @time_metric.rate} files/sec"
|
77
|
+
logger.info " processed #{@time_metric.count} at #{"%0.3f" % @time_metric.rate} files/sec ( #{@good_data_count} good, #{@bad_data_count} bad )"
|
82
78
|
end
|
83
79
|
original_order += 1
|
84
80
|
end
|
81
|
+
@results.flush
|
85
82
|
logger.info " processed #{@time_metric.count} at #{"%0.3f" % @time_metric.rate} files/sec"
|
86
|
-
logger.info " yielded #{@
|
83
|
+
logger.info " yielded #{@good_data_count} data points"
|
87
84
|
logger.info "End data collection"
|
88
85
|
nil
|
89
86
|
end
|
@@ -112,30 +109,30 @@ module Readorder
|
|
112
109
|
s.puts "Files analyzed : #{"%12d" % @time_metric.count}"
|
113
110
|
s.puts "Elapsed time : #{"%12d" % @time_metric.duration} seconds"
|
114
111
|
s.puts "Collection Rate : #{"%16.3f" % @time_metric.rate} files/sec"
|
115
|
-
s.puts "Good files : #{"%12d" % @
|
112
|
+
s.puts "Good files : #{"%12d" % @good_data_count}"
|
116
113
|
s.puts " average size : #{"%16.3f" % @size_metric.mean} bytes"
|
117
114
|
s.puts " minimum size : #{"%16.3f" % @size_metric.min} bytes"
|
118
115
|
s.puts " maximum size : #{"%16.3f" % @size_metric.max} bytes"
|
119
116
|
s.puts " sum of sizes : #{"%12d" % @size_metric.sum} bytes"
|
120
|
-
s.puts "Bad files : #{"%12d" % @
|
117
|
+
s.puts "Bad files : #{"%12d" % @bad_data_count}"
|
121
118
|
return s.string
|
122
119
|
end
|
123
120
|
|
124
121
|
#
|
125
122
|
# call-seq:
|
126
|
-
# analyzer.
|
123
|
+
# analyzer.dump_errors_to( IO ) -> nil
|
127
124
|
#
|
128
125
|
# write a csv to the _IO_ object passed in. The format is:
|
129
126
|
#
|
130
|
-
#
|
127
|
+
# error_reason,filename
|
131
128
|
#
|
132
129
|
# If there are no bad Datum instances then do not write anything.
|
133
130
|
#
|
134
|
-
def
|
135
|
-
if
|
131
|
+
def dump_errors_to( io )
|
132
|
+
if results.error_count > 0 then
|
136
133
|
io.puts "error_reason,filename"
|
137
|
-
|
138
|
-
io.puts "#{d
|
134
|
+
results.each_error do |d|
|
135
|
+
io.puts "#{d['error_reason']},#{d['filename']}"
|
139
136
|
end
|
140
137
|
end
|
141
138
|
nil
|
@@ -144,7 +141,7 @@ module Readorder
|
|
144
141
|
|
145
142
|
#
|
146
143
|
# call-seq:
|
147
|
-
# analyzer.
|
144
|
+
# analyzer.dump_valid_to( IO ) -> nil
|
148
145
|
#
|
149
146
|
# Write a csv fo the _IO_ object passed in. The format is:
|
150
147
|
#
|
@@ -153,17 +150,18 @@ module Readorder
|
|
153
150
|
# The last two fields *physical_block_count* and *first_physical_block_number* are
|
154
151
|
# only written if the analyzer was able to gather physical block information
|
155
152
|
#
|
156
|
-
def
|
153
|
+
def dump_valid_to( io )
|
157
154
|
fields = %w[ filename size inode_number ]
|
155
|
+
by_field = 'inode_number'
|
158
156
|
if @get_physical then
|
159
157
|
fields << 'physical_block_count'
|
160
158
|
fields << 'first_physical_block_number'
|
159
|
+
by_field = 'first_physical_block_number'
|
161
160
|
end
|
162
|
-
|
163
161
|
io.puts fields.join(",")
|
164
|
-
|
165
|
-
|
166
|
-
|
162
|
+
results.each_valid_by_field( by_field ) do |d|
|
163
|
+
f = fields.collect { |f| d[f] }
|
164
|
+
io.puts f.join(",")
|
167
165
|
end
|
168
166
|
end
|
169
167
|
end
|
data/lib/readorder/cli.rb
CHANGED
@@ -36,6 +36,7 @@ module Readorder
|
|
36
36
|
mixin :argument_filelist
|
37
37
|
mixin :option_output
|
38
38
|
mixin :option_error_filelist
|
39
|
+
mixin :option_batch_size
|
39
40
|
|
40
41
|
run { Cli.run_command_with_params( 'sort', params ) }
|
41
42
|
}
|
@@ -51,6 +52,7 @@ module Readorder
|
|
51
52
|
mixin :argument_filelist
|
52
53
|
mixin :option_output
|
53
54
|
mixin :option_error_filelist
|
55
|
+
mixin :option_batch_size
|
54
56
|
|
55
57
|
option( 'data-csv' ) {
|
56
58
|
description "Write the raw data collected to this csv file"
|
@@ -91,6 +93,7 @@ module Readorder
|
|
91
93
|
mixin :option_output
|
92
94
|
mixin :argument_filelist
|
93
95
|
mixin :option_error_filelist
|
96
|
+
mixin :option_batch_size
|
94
97
|
|
95
98
|
run { Cli.run_command_with_params( 'test', params ) }
|
96
99
|
}
|
@@ -137,6 +140,15 @@ module Readorder
|
|
137
140
|
validate { |f| File.directory?( File.dirname(File.expand_path( f ) ) ) }
|
138
141
|
end
|
139
142
|
end
|
143
|
+
|
144
|
+
mixin :option_batch_size do
|
145
|
+
option('batch-size' ) do
|
146
|
+
description "The number of files to queue before writing them to the db for storage"
|
147
|
+
argument :required
|
148
|
+
default 10_000
|
149
|
+
cast :integer
|
150
|
+
end
|
151
|
+
end
|
140
152
|
}
|
141
153
|
|
142
154
|
|
data/lib/readorder/command.rb
CHANGED
@@ -32,6 +32,7 @@ module Readorder
|
|
32
32
|
@filelist = nil
|
33
33
|
@analyzer = nil
|
34
34
|
@output = nil
|
35
|
+
@delete_results = true
|
35
36
|
end
|
36
37
|
|
37
38
|
def filelist
|
@@ -47,7 +48,21 @@ module Readorder
|
|
47
48
|
end
|
48
49
|
|
49
50
|
def analyzer
|
50
|
-
@analyzer ||= Analyzer.new( filelist, self.get_physical? )
|
51
|
+
@analyzer ||= Analyzer.new( filelist, self.results, self.get_physical? )
|
52
|
+
end
|
53
|
+
|
54
|
+
def results_dbfile
|
55
|
+
if options['output'] then
|
56
|
+
output_dname = File.dirname( options['output'] )
|
57
|
+
output_bname = File.basename( options['output'], '.*' )
|
58
|
+
return File.join( output_dname, "#{output_bname}.db" )
|
59
|
+
else
|
60
|
+
return ":memory:"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def results
|
65
|
+
@results ||= Results.new( results_dbfile, options['batch-size'] )
|
51
66
|
end
|
52
67
|
|
53
68
|
def output
|
@@ -94,24 +109,32 @@ module Readorder
|
|
94
109
|
end
|
95
110
|
|
96
111
|
# called by the Runner if an error is encountered during the run method
|
97
|
-
def error()
|
112
|
+
def error()
|
113
|
+
results.close
|
114
|
+
end
|
98
115
|
|
99
116
|
# called by runner if a signal is hit
|
100
|
-
def shutdown()
|
117
|
+
def shutdown()
|
118
|
+
results.close
|
119
|
+
@delete_results = false
|
120
|
+
end
|
101
121
|
|
102
122
|
# called by runner when all is done
|
103
123
|
def after()
|
104
|
-
if output != $stdout then
|
105
|
-
output.close
|
106
|
-
end
|
107
124
|
if options['error-filelist'] then
|
108
|
-
if analyzer.
|
125
|
+
if analyzer.bad_data_count > 0 then
|
109
126
|
File.open( options['error-filelist'], "w+" ) do |f|
|
110
|
-
analyzer.
|
127
|
+
analyzer.dump_errors_to( f )
|
111
128
|
end
|
112
129
|
logger.info "wrote error filelist to #{options['error-filelist']}"
|
113
130
|
end
|
114
131
|
end
|
132
|
+
|
133
|
+
if output != $stdout then
|
134
|
+
output.close
|
135
|
+
results.close
|
136
|
+
File.unlink( results_dbfile ) if @delete_results
|
137
|
+
end
|
115
138
|
end
|
116
139
|
|
117
140
|
class << self
|
@@ -7,18 +7,21 @@ module Readorder
|
|
7
7
|
#
|
8
8
|
class Sort < ::Readorder::Command
|
9
9
|
def run
|
10
|
+
|
10
11
|
analyzer.collect_data
|
11
12
|
analyzer.log_summary_report
|
12
|
-
|
13
|
+
|
14
|
+
field = nil
|
13
15
|
if get_physical? then
|
14
|
-
logger.info "using physical order"
|
15
|
-
|
16
|
+
logger.info "using first physical block number order"
|
17
|
+
field = 'first_physical_block_number'
|
16
18
|
else
|
17
|
-
logger.info "using inode order"
|
18
|
-
|
19
|
+
logger.info "using inode number order"
|
20
|
+
field = 'inode_number'
|
19
21
|
end
|
20
|
-
|
21
|
-
|
22
|
+
|
23
|
+
analyzer.results.each_valid_by_field( field ) do |row|
|
24
|
+
output.puts row['filename']
|
22
25
|
end
|
23
26
|
end
|
24
27
|
end
|
data/lib/readorder/datum.rb
CHANGED
@@ -36,6 +36,10 @@ module Readorder
|
|
36
36
|
@is_linux ||= ::Config::CONFIG['host_os'] =~ /linux/i
|
37
37
|
end
|
38
38
|
|
39
|
+
def self.hash_keys
|
40
|
+
%w[ filename inode_number first_physical_block_number original_order size ]
|
41
|
+
end
|
42
|
+
|
39
43
|
#
|
40
44
|
# call-seq:
|
41
45
|
# Datum.new( filename ) -> Datum
|
@@ -49,12 +53,39 @@ module Readorder
|
|
49
53
|
@physical_block_count = 0
|
50
54
|
@error_reason = nil
|
51
55
|
@original_order = 0
|
56
|
+
@size = 0
|
52
57
|
|
53
58
|
@stat = nil
|
54
59
|
@valid = false
|
55
60
|
@collected = false
|
56
61
|
end
|
57
62
|
|
63
|
+
#
|
64
|
+
# call-seq:
|
65
|
+
# datum.to_csv
|
66
|
+
#
|
67
|
+
# return the datum as a CSV in the format:
|
68
|
+
#
|
69
|
+
# physical_id,inode_id,filename
|
70
|
+
#
|
71
|
+
def to_csv
|
72
|
+
"#{first_physical_block_number},#{inode_number},#{filename}"
|
73
|
+
end
|
74
|
+
|
75
|
+
#
|
76
|
+
# :call-seq;
|
77
|
+
# datum.to_hash -> Hash
|
78
|
+
#
|
79
|
+
# return all the tiems in the datum as a hash
|
80
|
+
#
|
81
|
+
def to_hash
|
82
|
+
h = {}
|
83
|
+
Datum.hash_keys.each do |k|
|
84
|
+
h[k] = self.send( k )
|
85
|
+
end
|
86
|
+
return h
|
87
|
+
end
|
88
|
+
|
58
89
|
#
|
59
90
|
# call-seq:
|
60
91
|
# datum.size -> Integer
|
@@ -62,7 +93,7 @@ module Readorder
|
|
62
93
|
# The number of bytes the file consumes
|
63
94
|
#
|
64
95
|
def size
|
65
|
-
@stat.size
|
96
|
+
@size ||= @stat.size
|
66
97
|
end
|
67
98
|
|
68
99
|
#
|
@@ -0,0 +1,224 @@
|
|
1
|
+
require 'amalgalite'
|
2
|
+
|
3
|
+
module Readorder
|
4
|
+
# Results persists the results from a readorder run
|
5
|
+
# The results are persisted in an SQlite3 database which allows for ordering
|
6
|
+
# the results by whatever means are wanted.
|
7
|
+
class Results
|
8
|
+
def self.create_table_sql
|
9
|
+
sql = <<-SQL
|
10
|
+
CREATE TABLE readorder_valid (
|
11
|
+
original_order INTEGER PRIMARY KEY NOT NULL,
|
12
|
+
size INTEGER NOT NULL,
|
13
|
+
inode_number INTEGER NOT NULL UNIQUE,
|
14
|
+
first_physical_block_number INTEGER UNIQUE,
|
15
|
+
physical_block_count INTEGER,
|
16
|
+
filename TEXT NOT NULL UNIQUE
|
17
|
+
);
|
18
|
+
|
19
|
+
CREATE TABLE readorder_errors (
|
20
|
+
original_order INTEGER PRIMARY KEY NOT NULL,
|
21
|
+
filename TEXT NOT NULL UNIQUE,
|
22
|
+
error_reason TEXT NOT NULL
|
23
|
+
);
|
24
|
+
SQL
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# :call-seq:
|
29
|
+
# Results.new( filename, 10_000 ) -> results
|
30
|
+
#
|
31
|
+
# Create a new Results object with a batch size. The batch size is how many
|
32
|
+
# items to queue up to run in a single transaction into the sqlite database.
|
33
|
+
#
|
34
|
+
# By default the batch size is 1 which is not very performant.
|
35
|
+
#
|
36
|
+
def initialize( filename, batch_size = 1 )
|
37
|
+
@db = Amalgalite::Database.new( filename )
|
38
|
+
|
39
|
+
unless @db.schema.tables['readorder_valid'] then
|
40
|
+
logger.info "Creating tables"
|
41
|
+
@db.execute_batch( Results.create_table_sql )
|
42
|
+
end
|
43
|
+
@db.reload_schema!
|
44
|
+
@batch_size = batch_size
|
45
|
+
@valid_queue = []
|
46
|
+
@error_queue = []
|
47
|
+
end
|
48
|
+
|
49
|
+
def close
|
50
|
+
flush
|
51
|
+
@db.close
|
52
|
+
end
|
53
|
+
|
54
|
+
def flush
|
55
|
+
flush_valid
|
56
|
+
flush_error
|
57
|
+
end
|
58
|
+
|
59
|
+
def logger
|
60
|
+
Logging::Logger[ self ]
|
61
|
+
end
|
62
|
+
|
63
|
+
#
|
64
|
+
# :call-seq:
|
65
|
+
# results.has_datum_for_filename?( filename )
|
66
|
+
#
|
67
|
+
# return true or false if the give filename is alread in the database
|
68
|
+
#
|
69
|
+
def has_datum_for_filename?( filename )
|
70
|
+
@db.first_value_from( "SELECT filename FROM readorder_valid WHERE filename = ?", filename )
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# :call-seq:
|
75
|
+
# results.add_datum( datum )
|
76
|
+
#
|
77
|
+
# add a datum to the database, this will insert the datum into either valid
|
78
|
+
# or errors depending on the state of datum.valid?
|
79
|
+
#
|
80
|
+
def add_datum( datum )
|
81
|
+
if datum.valid?
|
82
|
+
@valid_queue << datum
|
83
|
+
else
|
84
|
+
@error_queue << datum
|
85
|
+
end
|
86
|
+
flush if ((@valid_queue.size + @error_queue.size) >= @batch_size )
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# :call-seq:
|
91
|
+
# results.flush_valid
|
92
|
+
#
|
93
|
+
# Flush all the pending valid items to the sqlite database
|
94
|
+
#
|
95
|
+
def flush_valid
|
96
|
+
if @valid_queue.size > 0 then
|
97
|
+
logger.info "Flushing #{@valid_queue.size} valid items to disk"
|
98
|
+
sql = <<-insert
|
99
|
+
INSERT INTO readorder_valid ( original_order,
|
100
|
+
size,
|
101
|
+
inode_number,
|
102
|
+
first_physical_block_number,
|
103
|
+
physical_block_count,
|
104
|
+
filename )
|
105
|
+
VALUES( ?, ?, ?, ?, ?, ? );
|
106
|
+
insert
|
107
|
+
@db.transaction do |trans|
|
108
|
+
trans.prepare( sql ) do |stmt|
|
109
|
+
until @valid_queue.empty? do
|
110
|
+
datum = @valid_queue.shift
|
111
|
+
stmt.execute( datum.original_order,
|
112
|
+
datum.size,
|
113
|
+
datum.inode_number,
|
114
|
+
datum.first_physical_block_number,
|
115
|
+
datum.physical_block_count,
|
116
|
+
datum.filename)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# :call-seq:
|
124
|
+
# results.valid_count -> Integer
|
125
|
+
#
|
126
|
+
# return the number of valid result rows
|
127
|
+
#
|
128
|
+
def valid_count
|
129
|
+
@db.first_value_from( "SELECT count(original_order) FROM readorder_valid" )
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
#
|
134
|
+
# :call-seq:
|
135
|
+
# results.each_valid { |v| ... }
|
136
|
+
#
|
137
|
+
# Return each valid record without any predefined order
|
138
|
+
#
|
139
|
+
def each_valid( &block )
|
140
|
+
@db.execute( "SELECT * FROM readorder_valid" ) do |row|
|
141
|
+
yield row
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
#
|
146
|
+
# :call-seq:
|
147
|
+
# results.each_valid_by_physical_block_number { |v| ... }
|
148
|
+
#
|
149
|
+
# Return each valid record in physical block number order
|
150
|
+
#
|
151
|
+
def each_valid_by_first_physical_block_number( &block )
|
152
|
+
each_valid_by_field( 'first_physical_block_number' ) do |row|
|
153
|
+
block.call( row )
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
#
|
158
|
+
# :call-seq:
|
159
|
+
# results.each_valid_by_inode_number { |v| ... }
|
160
|
+
#
|
161
|
+
def each_valid_by_inode_number( &block )
|
162
|
+
each_valid_by_field( 'inode_number' ) do |row|
|
163
|
+
block.call( row )
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
#
|
168
|
+
# :call-seq:
|
169
|
+
# results.each_valid_by_field( field ) { |v| ... }
|
170
|
+
#
|
171
|
+
def each_valid_by_field( field, &block )
|
172
|
+
@db.execute( "SELECT * from readorder_valid ORDER BY #{field} ASC" ) do |row|
|
173
|
+
yield row
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# :call-seq:
|
178
|
+
# results.flush_error
|
179
|
+
#
|
180
|
+
# Flush all the error items to disk
|
181
|
+
#
|
182
|
+
def flush_error
|
183
|
+
if @error_queue.size > 0 then
|
184
|
+
logger.info "Flushing #{@error_queue.size} error items to disk"
|
185
|
+
sql = <<-insert
|
186
|
+
INSERT INTO readorder_errors ( original_order, filename, error_reason )
|
187
|
+
VALUES( ?, ?, ? );
|
188
|
+
insert
|
189
|
+
@db.transaction do |trans|
|
190
|
+
trans.prepare( sql ) do |stmt|
|
191
|
+
until @error_queue.empty? do
|
192
|
+
datum = @error_queue.shift
|
193
|
+
stmt.execute( datum.original_order,
|
194
|
+
datum.filename,
|
195
|
+
datum.error_reason )
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
# :call-seq:
|
203
|
+
# results.error_count -> Integer
|
204
|
+
#
|
205
|
+
# return the number of errors
|
206
|
+
#
|
207
|
+
def error_count
|
208
|
+
@db.first_value_from( "SELECT count(original_order) FROM readorder_errors" )
|
209
|
+
end
|
210
|
+
|
211
|
+
#
|
212
|
+
# :call-seq:
|
213
|
+
# results.each_error { |e| ... }
|
214
|
+
#
|
215
|
+
# Return each error record without any predefined order
|
216
|
+
#
|
217
|
+
def each_error( &block )
|
218
|
+
@db.execute( "SELECT * FROM readorder_errors" ) do |row|
|
219
|
+
yield row
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
end
|
224
|
+
end
|
data/lib/readorder/version.rb
CHANGED
data/spec/analyzer_spec.rb
CHANGED
@@ -5,18 +5,27 @@ require 'readorder/analyzer'
|
|
5
5
|
describe Readorder::Analyzer do
|
6
6
|
before( :each ) do
|
7
7
|
s = StringIO.new
|
8
|
-
fl = Dir.glob("#{Readorder::Paths.spec_path}
|
9
|
-
s.
|
8
|
+
fl = Dir.glob("#{Readorder::Paths.spec_path}*_spec.rb")
|
9
|
+
s.write( fl.join("\n") )
|
10
10
|
s.rewind
|
11
|
+
|
11
12
|
@filelist = Readorder::Filelist.new( s )
|
12
|
-
@
|
13
|
+
@r = Readorder::Results.new( ":memory:" )
|
14
|
+
|
15
|
+
@analyzer = Readorder::Analyzer.new( @filelist, @r , false )
|
16
|
+
end
|
17
|
+
|
18
|
+
after( :each ) do
|
19
|
+
@r.close
|
13
20
|
end
|
14
21
|
|
15
22
|
it "collects data about files" do
|
16
23
|
@analyzer.collect_data
|
17
|
-
@analyzer.
|
18
|
-
|
19
|
-
@analyzer.
|
24
|
+
@analyzer.results.valid_count.should > 0
|
25
|
+
check_count = 0
|
26
|
+
@analyzer.results.each_valid { |v| check_count += 1 }
|
27
|
+
check_count.should > 0
|
28
|
+
check_count.should == @analyzer.results.valid_count
|
20
29
|
end
|
21
30
|
|
22
31
|
it "logs a summary report" do
|
@@ -29,12 +38,12 @@ describe Readorder::Analyzer do
|
|
29
38
|
s = StringIO.new
|
30
39
|
s.puts "/a/nonexistent/file"
|
31
40
|
s.rewind
|
32
|
-
analyzer = Readorder::Analyzer.new( Readorder::Filelist.new( s ) )
|
41
|
+
analyzer = Readorder::Analyzer.new( Readorder::Filelist.new( s ), @r, false )
|
33
42
|
analyzer.collect_data
|
34
|
-
analyzer.
|
43
|
+
analyzer.results.error_count.should > 0
|
35
44
|
|
36
45
|
s2 = StringIO.new
|
37
|
-
analyzer.
|
46
|
+
analyzer.dump_errors_to( s2 )
|
38
47
|
s2.rewind
|
39
48
|
s2.gets.should == "error_reason,filename\n"
|
40
49
|
s2.gets.should == "No such file or directory - /a/nonexistent/file,/a/nonexistent/file\n"
|
@@ -43,9 +52,25 @@ describe Readorder::Analyzer do
|
|
43
52
|
it "can dump good data to a csv" do
|
44
53
|
@analyzer.collect_data
|
45
54
|
s = StringIO.new
|
46
|
-
@analyzer.
|
55
|
+
@analyzer.dump_valid_to( s )
|
47
56
|
s.rewind
|
48
57
|
s.gets.should == "filename,size,inode_number\n"
|
49
|
-
s.read.split("\n").size.should == @analyzer.
|
58
|
+
s.read.split("\n").size.should == @analyzer.results.valid_count
|
59
|
+
end
|
60
|
+
|
61
|
+
it "can iterate over inode block numbers" do
|
62
|
+
@analyzer.collect_data
|
63
|
+
by_order = []
|
64
|
+
@analyzer.results.each_valid_by_field( 'original_order' ) do |r|
|
65
|
+
by_order << r['filename']
|
66
|
+
end
|
67
|
+
|
68
|
+
by_inode = []
|
69
|
+
@analyzer.results.each_valid_by_inode_number do |r|
|
70
|
+
by_inode << r['filename']
|
71
|
+
end
|
72
|
+
|
73
|
+
by_order.should_not == by_inode
|
74
|
+
by_order.sort.should == by_inode.sort
|
50
75
|
end
|
51
76
|
end
|
data/tasks/distribution.rake
CHANGED
@@ -18,7 +18,7 @@ if pkg_config = Configuration.for_if_exist?("packaging") then
|
|
18
18
|
|
19
19
|
desc "Install as a gem"
|
20
20
|
task :install => [:clobber, :package] do
|
21
|
-
sh "sudo gem install pkg/#{Readorder::GEM_SPEC.full_name}.gem"
|
21
|
+
sh "sudo gem install pkg/#{Readorder::GEM_SPEC.full_name}.gem --no-rdoc --no-ri --ignore-dependencies --local"
|
22
22
|
end
|
23
23
|
|
24
24
|
desc "Uninstall gem"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: readorder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Hinegardner
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-08-24 00:00:00 -06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -23,14 +23,14 @@ dependencies:
|
|
23
23
|
version: 0.0.5
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
|
-
name:
|
26
|
+
name: amalgalite
|
27
27
|
type: :runtime
|
28
28
|
version_requirement:
|
29
29
|
version_requirements: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.
|
33
|
+
version: 0.11.0
|
34
34
|
version:
|
35
35
|
- !ruby/object:Gem::Dependency
|
36
36
|
name: main
|
@@ -60,7 +60,7 @@ dependencies:
|
|
60
60
|
requirements:
|
61
61
|
- - ~>
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version: 1.0.
|
63
|
+
version: 1.0.4
|
64
64
|
version:
|
65
65
|
- !ruby/object:Gem::Dependency
|
66
66
|
name: rake
|
@@ -92,6 +92,7 @@ extra_rdoc_files:
|
|
92
92
|
- lib/readorder/filelist.rb
|
93
93
|
- lib/readorder/log.rb
|
94
94
|
- lib/readorder/paths.rb
|
95
|
+
- lib/readorder/results.rb
|
95
96
|
- lib/readorder/runner.rb
|
96
97
|
- lib/readorder/version.rb
|
97
98
|
- lib/readorder.rb
|
@@ -107,6 +108,7 @@ files:
|
|
107
108
|
- lib/readorder/filelist.rb
|
108
109
|
- lib/readorder/log.rb
|
109
110
|
- lib/readorder/paths.rb
|
111
|
+
- lib/readorder/results.rb
|
110
112
|
- lib/readorder/runner.rb
|
111
113
|
- lib/readorder/version.rb
|
112
114
|
- lib/readorder.rb
|