readorder 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY +9 -0
- data/gemspec.rb +2 -2
- data/lib/readorder.rb +7 -0
- data/lib/readorder/analyzer.rb +45 -47
- data/lib/readorder/cli.rb +12 -0
- data/lib/readorder/command.rb +31 -8
- data/lib/readorder/commands/sort.rb +10 -7
- data/lib/readorder/datum.rb +32 -1
- data/lib/readorder/results.rb +224 -0
- data/lib/readorder/version.rb +1 -1
- data/spec/analyzer_spec.rb +36 -11
- data/tasks/distribution.rake +1 -1
- metadata +7 -5
data/HISTORY
CHANGED
@@ -1,4 +1,13 @@
|
|
1
1
|
= Changelog
|
2
|
+
|
3
|
+
== Version 2.0.0 - 2009-08-24
|
4
|
+
|
5
|
+
=== Enhancements
|
6
|
+
|
7
|
+
* complete rewrite of internal file sorting and temporary storage enabling
|
8
|
+
the 'readordering' of large lists of files.
|
9
|
+
* switch to amalgalite instead of rbtree for sorting and storage
|
10
|
+
|
2
11
|
== Version 1.0.0
|
3
12
|
|
4
13
|
* Initial public release
|
data/gemspec.rb
CHANGED
@@ -21,10 +21,10 @@ Readorder::GEM_SPEC = Gem::Specification.new do |spec|
|
|
21
21
|
|
22
22
|
# add dependencies here
|
23
23
|
spec.add_dependency("configuration", "~> 0.0.5")
|
24
|
-
spec.add_dependency("
|
24
|
+
spec.add_dependency("amalgalite", "~> 0.11.0")
|
25
25
|
spec.add_dependency("main", "~> 2.8.3")
|
26
26
|
spec.add_dependency("logging", "~> 1.1.4")
|
27
|
-
spec.add_dependency("hitimes", "~> 1.0.
|
27
|
+
spec.add_dependency("hitimes", "~> 1.0.4")
|
28
28
|
|
29
29
|
spec.add_development_dependency( "rake", "~> 0.8.3")
|
30
30
|
|
data/lib/readorder.rb
CHANGED
@@ -16,6 +16,13 @@ end
|
|
16
16
|
|
17
17
|
require 'rubygems'
|
18
18
|
require 'logging'
|
19
|
+
|
20
|
+
# require amalgalite explicitly before hitimes explicitly because of
|
21
|
+
# using flat namespace on OSX
|
22
|
+
# http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/344658
|
23
|
+
require 'amalgalite'
|
24
|
+
require 'hitimes'
|
25
|
+
|
19
26
|
require 'readorder/version'
|
20
27
|
require 'readorder/paths'
|
21
28
|
require 'readorder/cli'
|
data/lib/readorder/analyzer.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
|
-
require 'hitimes'
|
2
1
|
require 'readorder/datum'
|
3
|
-
require '
|
2
|
+
require 'readorder/results'
|
4
3
|
|
5
4
|
module Readorder
|
6
5
|
#
|
@@ -8,34 +7,27 @@ module Readorder
|
|
8
7
|
# appropriate Datum instances
|
9
8
|
#
|
10
9
|
class Analyzer
|
11
|
-
#
|
12
|
-
attr_accessor :
|
10
|
+
# number of bad_data items encountered
|
11
|
+
attr_accessor :bad_data_count
|
12
|
+
|
13
|
+
# number of good_data items encountered
|
14
|
+
attr_accessor :good_data_count
|
13
15
|
|
14
|
-
#
|
15
|
-
attr_accessor :
|
16
|
-
|
17
|
-
# an RBTree of Datum instances of those files that were analyzed
|
18
|
-
# in order by phyiscal disc block number. This only has items if
|
19
|
-
# the physical block number was obtained. It is empty otherwise
|
20
|
-
attr_accessor :physical_order
|
21
|
-
|
22
|
-
# an RBTree of Datum instances of those files that were analyzed
|
23
|
-
# in order by inode
|
24
|
-
attr_accessor :inode_order
|
16
|
+
# The Results handler
|
17
|
+
attr_accessor :results
|
25
18
|
|
26
19
|
#
|
27
20
|
# Initialize the Analyzer with the Filelist object and whether or
|
28
21
|
# not to gather the physical block size.
|
29
22
|
#
|
30
|
-
def initialize( filelist, get_physical = true )
|
23
|
+
def initialize( filelist, results, get_physical = true )
|
31
24
|
@filelist = filelist
|
32
|
-
@bad_data = []
|
33
|
-
@good_data = []
|
34
|
-
@physical_order = ::MultiRBTree.new
|
35
|
-
@inode_order = ::MultiRBTree.new
|
36
25
|
@get_physical = get_physical
|
37
26
|
@size_metric = ::Hitimes::ValueMetric.new( 'size' )
|
38
27
|
@time_metric = ::Hitimes::TimedMetric.new( 'time' )
|
28
|
+
@results = results
|
29
|
+
@bad_data_count = 0
|
30
|
+
@good_data_count = 0
|
39
31
|
end
|
40
32
|
|
41
33
|
#
|
@@ -60,30 +52,35 @@ module Readorder
|
|
60
52
|
logger.info "Begin data collection"
|
61
53
|
original_order = 0
|
62
54
|
@filelist.each_line do |fname|
|
63
|
-
|
55
|
+
next if @results.has_datum_for_filename?( fname )
|
56
|
+
logger.debug " analyzing #{fname.strip}"
|
64
57
|
@time_metric.measure do
|
65
58
|
d = Datum.new( fname )
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
@
|
71
|
-
|
72
|
-
if
|
73
|
-
@
|
59
|
+
begin
|
60
|
+
d.collect( @get_physical )
|
61
|
+
d.original_order = original_order
|
62
|
+
|
63
|
+
@results.add_datum( d )
|
64
|
+
|
65
|
+
if d.valid? then
|
66
|
+
@size_metric.measure d.stat.size
|
67
|
+
@good_data_count += 1
|
68
|
+
else
|
69
|
+
@bad_data_count += 1
|
74
70
|
end
|
75
|
-
|
76
|
-
|
71
|
+
rescue => e
|
72
|
+
logger.error "#{e} : #{d.to_hash.inspect}"
|
77
73
|
end
|
78
74
|
end
|
79
75
|
|
80
76
|
if @time_metric.count % 10_000 == 0 then
|
81
|
-
logger.info " processed #{@time_metric.count} at #{"%0.3f" % @time_metric.rate} files/sec"
|
77
|
+
logger.info " processed #{@time_metric.count} at #{"%0.3f" % @time_metric.rate} files/sec ( #{@good_data_count} good, #{@bad_data_count} bad )"
|
82
78
|
end
|
83
79
|
original_order += 1
|
84
80
|
end
|
81
|
+
@results.flush
|
85
82
|
logger.info " processed #{@time_metric.count} at #{"%0.3f" % @time_metric.rate} files/sec"
|
86
|
-
logger.info " yielded #{@
|
83
|
+
logger.info " yielded #{@good_data_count} data points"
|
87
84
|
logger.info "End data collection"
|
88
85
|
nil
|
89
86
|
end
|
@@ -112,30 +109,30 @@ module Readorder
|
|
112
109
|
s.puts "Files analyzed : #{"%12d" % @time_metric.count}"
|
113
110
|
s.puts "Elapsed time : #{"%12d" % @time_metric.duration} seconds"
|
114
111
|
s.puts "Collection Rate : #{"%16.3f" % @time_metric.rate} files/sec"
|
115
|
-
s.puts "Good files : #{"%12d" % @
|
112
|
+
s.puts "Good files : #{"%12d" % @good_data_count}"
|
116
113
|
s.puts " average size : #{"%16.3f" % @size_metric.mean} bytes"
|
117
114
|
s.puts " minimum size : #{"%16.3f" % @size_metric.min} bytes"
|
118
115
|
s.puts " maximum size : #{"%16.3f" % @size_metric.max} bytes"
|
119
116
|
s.puts " sum of sizes : #{"%12d" % @size_metric.sum} bytes"
|
120
|
-
s.puts "Bad files : #{"%12d" % @
|
117
|
+
s.puts "Bad files : #{"%12d" % @bad_data_count}"
|
121
118
|
return s.string
|
122
119
|
end
|
123
120
|
|
124
121
|
#
|
125
122
|
# call-seq:
|
126
|
-
# analyzer.
|
123
|
+
# analyzer.dump_errors_to( IO ) -> nil
|
127
124
|
#
|
128
125
|
# write a csv to the _IO_ object passed in. The format is:
|
129
126
|
#
|
130
|
-
#
|
127
|
+
# error_reason,filename
|
131
128
|
#
|
132
129
|
# If there are no bad Datum instances then do not write anything.
|
133
130
|
#
|
134
|
-
def
|
135
|
-
if
|
131
|
+
def dump_errors_to( io )
|
132
|
+
if results.error_count > 0 then
|
136
133
|
io.puts "error_reason,filename"
|
137
|
-
|
138
|
-
io.puts "#{d
|
134
|
+
results.each_error do |d|
|
135
|
+
io.puts "#{d['error_reason']},#{d['filename']}"
|
139
136
|
end
|
140
137
|
end
|
141
138
|
nil
|
@@ -144,7 +141,7 @@ module Readorder
|
|
144
141
|
|
145
142
|
#
|
146
143
|
# call-seq:
|
147
|
-
# analyzer.
|
144
|
+
# analyzer.dump_valid_to( IO ) -> nil
|
148
145
|
#
|
149
146
|
# Write a csv fo the _IO_ object passed in. The format is:
|
150
147
|
#
|
@@ -153,17 +150,18 @@ module Readorder
|
|
153
150
|
# The last two fields *physical_block_count* and *first_physical_block_number* are
|
154
151
|
# only written if the analyzer was able to gather physical block information
|
155
152
|
#
|
156
|
-
def
|
153
|
+
def dump_valid_to( io )
|
157
154
|
fields = %w[ filename size inode_number ]
|
155
|
+
by_field = 'inode_number'
|
158
156
|
if @get_physical then
|
159
157
|
fields << 'physical_block_count'
|
160
158
|
fields << 'first_physical_block_number'
|
159
|
+
by_field = 'first_physical_block_number'
|
161
160
|
end
|
162
|
-
|
163
161
|
io.puts fields.join(",")
|
164
|
-
|
165
|
-
|
166
|
-
|
162
|
+
results.each_valid_by_field( by_field ) do |d|
|
163
|
+
f = fields.collect { |f| d[f] }
|
164
|
+
io.puts f.join(",")
|
167
165
|
end
|
168
166
|
end
|
169
167
|
end
|
data/lib/readorder/cli.rb
CHANGED
@@ -36,6 +36,7 @@ module Readorder
|
|
36
36
|
mixin :argument_filelist
|
37
37
|
mixin :option_output
|
38
38
|
mixin :option_error_filelist
|
39
|
+
mixin :option_batch_size
|
39
40
|
|
40
41
|
run { Cli.run_command_with_params( 'sort', params ) }
|
41
42
|
}
|
@@ -51,6 +52,7 @@ module Readorder
|
|
51
52
|
mixin :argument_filelist
|
52
53
|
mixin :option_output
|
53
54
|
mixin :option_error_filelist
|
55
|
+
mixin :option_batch_size
|
54
56
|
|
55
57
|
option( 'data-csv' ) {
|
56
58
|
description "Write the raw data collected to this csv file"
|
@@ -91,6 +93,7 @@ module Readorder
|
|
91
93
|
mixin :option_output
|
92
94
|
mixin :argument_filelist
|
93
95
|
mixin :option_error_filelist
|
96
|
+
mixin :option_batch_size
|
94
97
|
|
95
98
|
run { Cli.run_command_with_params( 'test', params ) }
|
96
99
|
}
|
@@ -137,6 +140,15 @@ module Readorder
|
|
137
140
|
validate { |f| File.directory?( File.dirname(File.expand_path( f ) ) ) }
|
138
141
|
end
|
139
142
|
end
|
143
|
+
|
144
|
+
mixin :option_batch_size do
|
145
|
+
option('batch-size' ) do
|
146
|
+
description "The number of files to queue before writing them to the db for storage"
|
147
|
+
argument :required
|
148
|
+
default 10_000
|
149
|
+
cast :integer
|
150
|
+
end
|
151
|
+
end
|
140
152
|
}
|
141
153
|
|
142
154
|
|
data/lib/readorder/command.rb
CHANGED
@@ -32,6 +32,7 @@ module Readorder
|
|
32
32
|
@filelist = nil
|
33
33
|
@analyzer = nil
|
34
34
|
@output = nil
|
35
|
+
@delete_results = true
|
35
36
|
end
|
36
37
|
|
37
38
|
def filelist
|
@@ -47,7 +48,21 @@ module Readorder
|
|
47
48
|
end
|
48
49
|
|
49
50
|
def analyzer
|
50
|
-
@analyzer ||= Analyzer.new( filelist, self.get_physical? )
|
51
|
+
@analyzer ||= Analyzer.new( filelist, self.results, self.get_physical? )
|
52
|
+
end
|
53
|
+
|
54
|
+
def results_dbfile
|
55
|
+
if options['output'] then
|
56
|
+
output_dname = File.dirname( options['output'] )
|
57
|
+
output_bname = File.basename( options['output'], '.*' )
|
58
|
+
return File.join( output_dname, "#{output_bname}.db" )
|
59
|
+
else
|
60
|
+
return ":memory:"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def results
|
65
|
+
@results ||= Results.new( results_dbfile, options['batch-size'] )
|
51
66
|
end
|
52
67
|
|
53
68
|
def output
|
@@ -94,24 +109,32 @@ module Readorder
|
|
94
109
|
end
|
95
110
|
|
96
111
|
# called by the Runner if an error is encountered during the run method
|
97
|
-
def error()
|
112
|
+
def error()
|
113
|
+
results.close
|
114
|
+
end
|
98
115
|
|
99
116
|
# called by runner if a signal is hit
|
100
|
-
def shutdown()
|
117
|
+
def shutdown()
|
118
|
+
results.close
|
119
|
+
@delete_results = false
|
120
|
+
end
|
101
121
|
|
102
122
|
# called by runner when all is done
|
103
123
|
def after()
|
104
|
-
if output != $stdout then
|
105
|
-
output.close
|
106
|
-
end
|
107
124
|
if options['error-filelist'] then
|
108
|
-
if analyzer.
|
125
|
+
if analyzer.bad_data_count > 0 then
|
109
126
|
File.open( options['error-filelist'], "w+" ) do |f|
|
110
|
-
analyzer.
|
127
|
+
analyzer.dump_errors_to( f )
|
111
128
|
end
|
112
129
|
logger.info "wrote error filelist to #{options['error-filelist']}"
|
113
130
|
end
|
114
131
|
end
|
132
|
+
|
133
|
+
if output != $stdout then
|
134
|
+
output.close
|
135
|
+
results.close
|
136
|
+
File.unlink( results_dbfile ) if @delete_results
|
137
|
+
end
|
115
138
|
end
|
116
139
|
|
117
140
|
class << self
|
@@ -7,18 +7,21 @@ module Readorder
|
|
7
7
|
#
|
8
8
|
class Sort < ::Readorder::Command
|
9
9
|
def run
|
10
|
+
|
10
11
|
analyzer.collect_data
|
11
12
|
analyzer.log_summary_report
|
12
|
-
|
13
|
+
|
14
|
+
field = nil
|
13
15
|
if get_physical? then
|
14
|
-
logger.info "using physical order"
|
15
|
-
|
16
|
+
logger.info "using first physical block number order"
|
17
|
+
field = 'first_physical_block_number'
|
16
18
|
else
|
17
|
-
logger.info "using inode order"
|
18
|
-
|
19
|
+
logger.info "using inode number order"
|
20
|
+
field = 'inode_number'
|
19
21
|
end
|
20
|
-
|
21
|
-
|
22
|
+
|
23
|
+
analyzer.results.each_valid_by_field( field ) do |row|
|
24
|
+
output.puts row['filename']
|
22
25
|
end
|
23
26
|
end
|
24
27
|
end
|
data/lib/readorder/datum.rb
CHANGED
@@ -36,6 +36,10 @@ module Readorder
|
|
36
36
|
@is_linux ||= ::Config::CONFIG['host_os'] =~ /linux/i
|
37
37
|
end
|
38
38
|
|
39
|
+
def self.hash_keys
|
40
|
+
%w[ filename inode_number first_physical_block_number original_order size ]
|
41
|
+
end
|
42
|
+
|
39
43
|
#
|
40
44
|
# call-seq:
|
41
45
|
# Datum.new( filename ) -> Datum
|
@@ -49,12 +53,39 @@ module Readorder
|
|
49
53
|
@physical_block_count = 0
|
50
54
|
@error_reason = nil
|
51
55
|
@original_order = 0
|
56
|
+
@size = 0
|
52
57
|
|
53
58
|
@stat = nil
|
54
59
|
@valid = false
|
55
60
|
@collected = false
|
56
61
|
end
|
57
62
|
|
63
|
+
#
|
64
|
+
# call-seq:
|
65
|
+
# datum.to_csv
|
66
|
+
#
|
67
|
+
# return the datum as a CSV in the format:
|
68
|
+
#
|
69
|
+
# physical_id,inode_id,filename
|
70
|
+
#
|
71
|
+
def to_csv
|
72
|
+
"#{first_physical_block_number},#{inode_number},#{filename}"
|
73
|
+
end
|
74
|
+
|
75
|
+
#
|
76
|
+
# :call-seq;
|
77
|
+
# datum.to_hash -> Hash
|
78
|
+
#
|
79
|
+
# return all the tiems in the datum as a hash
|
80
|
+
#
|
81
|
+
def to_hash
|
82
|
+
h = {}
|
83
|
+
Datum.hash_keys.each do |k|
|
84
|
+
h[k] = self.send( k )
|
85
|
+
end
|
86
|
+
return h
|
87
|
+
end
|
88
|
+
|
58
89
|
#
|
59
90
|
# call-seq:
|
60
91
|
# datum.size -> Integer
|
@@ -62,7 +93,7 @@ module Readorder
|
|
62
93
|
# The number of bytes the file consumes
|
63
94
|
#
|
64
95
|
def size
|
65
|
-
@stat.size
|
96
|
+
@size ||= @stat.size
|
66
97
|
end
|
67
98
|
|
68
99
|
#
|
@@ -0,0 +1,224 @@
|
|
1
|
+
require 'amalgalite'
|
2
|
+
|
3
|
+
module Readorder
|
4
|
+
# Results persists the results from a readorder run
|
5
|
+
# The results are persisted in an SQlite3 database which allows for ordering
|
6
|
+
# the results by whatever means are wanted.
|
7
|
+
class Results
|
8
|
+
def self.create_table_sql
|
9
|
+
sql = <<-SQL
|
10
|
+
CREATE TABLE readorder_valid (
|
11
|
+
original_order INTEGER PRIMARY KEY NOT NULL,
|
12
|
+
size INTEGER NOT NULL,
|
13
|
+
inode_number INTEGER NOT NULL UNIQUE,
|
14
|
+
first_physical_block_number INTEGER UNIQUE,
|
15
|
+
physical_block_count INTEGER,
|
16
|
+
filename TEXT NOT NULL UNIQUE
|
17
|
+
);
|
18
|
+
|
19
|
+
CREATE TABLE readorder_errors (
|
20
|
+
original_order INTEGER PRIMARY KEY NOT NULL,
|
21
|
+
filename TEXT NOT NULL UNIQUE,
|
22
|
+
error_reason TEXT NOT NULL
|
23
|
+
);
|
24
|
+
SQL
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# :call-seq:
|
29
|
+
# Results.new( filename, 10_000 ) -> results
|
30
|
+
#
|
31
|
+
# Create a new Results object with a batch size. The batch size is how many
|
32
|
+
# items to queue up to run in a single transaction into the sqlite database.
|
33
|
+
#
|
34
|
+
# By default the batch size is 1 which is not very performant.
|
35
|
+
#
|
36
|
+
def initialize( filename, batch_size = 1 )
|
37
|
+
@db = Amalgalite::Database.new( filename )
|
38
|
+
|
39
|
+
unless @db.schema.tables['readorder_valid'] then
|
40
|
+
logger.info "Creating tables"
|
41
|
+
@db.execute_batch( Results.create_table_sql )
|
42
|
+
end
|
43
|
+
@db.reload_schema!
|
44
|
+
@batch_size = batch_size
|
45
|
+
@valid_queue = []
|
46
|
+
@error_queue = []
|
47
|
+
end
|
48
|
+
|
49
|
+
def close
|
50
|
+
flush
|
51
|
+
@db.close
|
52
|
+
end
|
53
|
+
|
54
|
+
def flush
|
55
|
+
flush_valid
|
56
|
+
flush_error
|
57
|
+
end
|
58
|
+
|
59
|
+
def logger
|
60
|
+
Logging::Logger[ self ]
|
61
|
+
end
|
62
|
+
|
63
|
+
#
|
64
|
+
# :call-seq:
|
65
|
+
# results.has_datum_for_filename?( filename )
|
66
|
+
#
|
67
|
+
# return true or false if the give filename is alread in the database
|
68
|
+
#
|
69
|
+
def has_datum_for_filename?( filename )
|
70
|
+
@db.first_value_from( "SELECT filename FROM readorder_valid WHERE filename = ?", filename )
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# :call-seq:
|
75
|
+
# results.add_datum( datum )
|
76
|
+
#
|
77
|
+
# add a datum to the database, this will insert the datum into either valid
|
78
|
+
# or errors depending on the state of datum.valid?
|
79
|
+
#
|
80
|
+
def add_datum( datum )
|
81
|
+
if datum.valid?
|
82
|
+
@valid_queue << datum
|
83
|
+
else
|
84
|
+
@error_queue << datum
|
85
|
+
end
|
86
|
+
flush if ((@valid_queue.size + @error_queue.size) >= @batch_size )
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# :call-seq:
|
91
|
+
# results.flush_valid
|
92
|
+
#
|
93
|
+
# Flush all the pending valid items to the sqlite database
|
94
|
+
#
|
95
|
+
def flush_valid
|
96
|
+
if @valid_queue.size > 0 then
|
97
|
+
logger.info "Flushing #{@valid_queue.size} valid items to disk"
|
98
|
+
sql = <<-insert
|
99
|
+
INSERT INTO readorder_valid ( original_order,
|
100
|
+
size,
|
101
|
+
inode_number,
|
102
|
+
first_physical_block_number,
|
103
|
+
physical_block_count,
|
104
|
+
filename )
|
105
|
+
VALUES( ?, ?, ?, ?, ?, ? );
|
106
|
+
insert
|
107
|
+
@db.transaction do |trans|
|
108
|
+
trans.prepare( sql ) do |stmt|
|
109
|
+
until @valid_queue.empty? do
|
110
|
+
datum = @valid_queue.shift
|
111
|
+
stmt.execute( datum.original_order,
|
112
|
+
datum.size,
|
113
|
+
datum.inode_number,
|
114
|
+
datum.first_physical_block_number,
|
115
|
+
datum.physical_block_count,
|
116
|
+
datum.filename)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# :call-seq:
|
124
|
+
# results.valid_count -> Integer
|
125
|
+
#
|
126
|
+
# return the number of valid result rows
|
127
|
+
#
|
128
|
+
def valid_count
|
129
|
+
@db.first_value_from( "SELECT count(original_order) FROM readorder_valid" )
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
#
|
134
|
+
# :call-seq:
|
135
|
+
# results.each_valid { |v| ... }
|
136
|
+
#
|
137
|
+
# Return each valid record without any predefined order
|
138
|
+
#
|
139
|
+
def each_valid( &block )
|
140
|
+
@db.execute( "SELECT * FROM readorder_valid" ) do |row|
|
141
|
+
yield row
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
#
|
146
|
+
# :call-seq:
|
147
|
+
# results.each_valid_by_physical_block_number { |v| ... }
|
148
|
+
#
|
149
|
+
# Return each valid record in physical block number order
|
150
|
+
#
|
151
|
+
def each_valid_by_first_physical_block_number( &block )
|
152
|
+
each_valid_by_field( 'first_physical_block_number' ) do |row|
|
153
|
+
block.call( row )
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
#
|
158
|
+
# :call-seq:
|
159
|
+
# results.each_valid_by_inode_number { |v| ... }
|
160
|
+
#
|
161
|
+
def each_valid_by_inode_number( &block )
|
162
|
+
each_valid_by_field( 'inode_number' ) do |row|
|
163
|
+
block.call( row )
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
#
|
168
|
+
# :call-seq:
|
169
|
+
# results.each_valid_by_field( field ) { |v| ... }
|
170
|
+
#
|
171
|
+
def each_valid_by_field( field, &block )
|
172
|
+
@db.execute( "SELECT * from readorder_valid ORDER BY #{field} ASC" ) do |row|
|
173
|
+
yield row
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# :call-seq:
|
178
|
+
# results.flush_error
|
179
|
+
#
|
180
|
+
# Flush all the error items to disk
|
181
|
+
#
|
182
|
+
def flush_error
|
183
|
+
if @error_queue.size > 0 then
|
184
|
+
logger.info "Flushing #{@error_queue.size} error items to disk"
|
185
|
+
sql = <<-insert
|
186
|
+
INSERT INTO readorder_errors ( original_order, filename, error_reason )
|
187
|
+
VALUES( ?, ?, ? );
|
188
|
+
insert
|
189
|
+
@db.transaction do |trans|
|
190
|
+
trans.prepare( sql ) do |stmt|
|
191
|
+
until @error_queue.empty? do
|
192
|
+
datum = @error_queue.shift
|
193
|
+
stmt.execute( datum.original_order,
|
194
|
+
datum.filename,
|
195
|
+
datum.error_reason )
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
# :call-seq:
|
203
|
+
# results.error_count -> Integer
|
204
|
+
#
|
205
|
+
# return the number of errors
|
206
|
+
#
|
207
|
+
def error_count
|
208
|
+
@db.first_value_from( "SELECT count(original_order) FROM readorder_errors" )
|
209
|
+
end
|
210
|
+
|
211
|
+
#
|
212
|
+
# :call-seq:
|
213
|
+
# results.each_error { |e| ... }
|
214
|
+
#
|
215
|
+
# Return each error record without any predefined order
|
216
|
+
#
|
217
|
+
def each_error( &block )
|
218
|
+
@db.execute( "SELECT * FROM readorder_errors" ) do |row|
|
219
|
+
yield row
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
end
|
224
|
+
end
|
data/lib/readorder/version.rb
CHANGED
data/spec/analyzer_spec.rb
CHANGED
@@ -5,18 +5,27 @@ require 'readorder/analyzer'
|
|
5
5
|
describe Readorder::Analyzer do
|
6
6
|
before( :each ) do
|
7
7
|
s = StringIO.new
|
8
|
-
fl = Dir.glob("#{Readorder::Paths.spec_path}
|
9
|
-
s.
|
8
|
+
fl = Dir.glob("#{Readorder::Paths.spec_path}*_spec.rb")
|
9
|
+
s.write( fl.join("\n") )
|
10
10
|
s.rewind
|
11
|
+
|
11
12
|
@filelist = Readorder::Filelist.new( s )
|
12
|
-
@
|
13
|
+
@r = Readorder::Results.new( ":memory:" )
|
14
|
+
|
15
|
+
@analyzer = Readorder::Analyzer.new( @filelist, @r , false )
|
16
|
+
end
|
17
|
+
|
18
|
+
after( :each ) do
|
19
|
+
@r.close
|
13
20
|
end
|
14
21
|
|
15
22
|
it "collects data about files" do
|
16
23
|
@analyzer.collect_data
|
17
|
-
@analyzer.
|
18
|
-
|
19
|
-
@analyzer.
|
24
|
+
@analyzer.results.valid_count.should > 0
|
25
|
+
check_count = 0
|
26
|
+
@analyzer.results.each_valid { |v| check_count += 1 }
|
27
|
+
check_count.should > 0
|
28
|
+
check_count.should == @analyzer.results.valid_count
|
20
29
|
end
|
21
30
|
|
22
31
|
it "logs a summary report" do
|
@@ -29,12 +38,12 @@ describe Readorder::Analyzer do
|
|
29
38
|
s = StringIO.new
|
30
39
|
s.puts "/a/nonexistent/file"
|
31
40
|
s.rewind
|
32
|
-
analyzer = Readorder::Analyzer.new( Readorder::Filelist.new( s ) )
|
41
|
+
analyzer = Readorder::Analyzer.new( Readorder::Filelist.new( s ), @r, false )
|
33
42
|
analyzer.collect_data
|
34
|
-
analyzer.
|
43
|
+
analyzer.results.error_count.should > 0
|
35
44
|
|
36
45
|
s2 = StringIO.new
|
37
|
-
analyzer.
|
46
|
+
analyzer.dump_errors_to( s2 )
|
38
47
|
s2.rewind
|
39
48
|
s2.gets.should == "error_reason,filename\n"
|
40
49
|
s2.gets.should == "No such file or directory - /a/nonexistent/file,/a/nonexistent/file\n"
|
@@ -43,9 +52,25 @@ describe Readorder::Analyzer do
|
|
43
52
|
it "can dump good data to a csv" do
|
44
53
|
@analyzer.collect_data
|
45
54
|
s = StringIO.new
|
46
|
-
@analyzer.
|
55
|
+
@analyzer.dump_valid_to( s )
|
47
56
|
s.rewind
|
48
57
|
s.gets.should == "filename,size,inode_number\n"
|
49
|
-
s.read.split("\n").size.should == @analyzer.
|
58
|
+
s.read.split("\n").size.should == @analyzer.results.valid_count
|
59
|
+
end
|
60
|
+
|
61
|
+
it "can iterate over inode block numbers" do
|
62
|
+
@analyzer.collect_data
|
63
|
+
by_order = []
|
64
|
+
@analyzer.results.each_valid_by_field( 'original_order' ) do |r|
|
65
|
+
by_order << r['filename']
|
66
|
+
end
|
67
|
+
|
68
|
+
by_inode = []
|
69
|
+
@analyzer.results.each_valid_by_inode_number do |r|
|
70
|
+
by_inode << r['filename']
|
71
|
+
end
|
72
|
+
|
73
|
+
by_order.should_not == by_inode
|
74
|
+
by_order.sort.should == by_inode.sort
|
50
75
|
end
|
51
76
|
end
|
data/tasks/distribution.rake
CHANGED
@@ -18,7 +18,7 @@ if pkg_config = Configuration.for_if_exist?("packaging") then
|
|
18
18
|
|
19
19
|
desc "Install as a gem"
|
20
20
|
task :install => [:clobber, :package] do
|
21
|
-
sh "sudo gem install pkg/#{Readorder::GEM_SPEC.full_name}.gem"
|
21
|
+
sh "sudo gem install pkg/#{Readorder::GEM_SPEC.full_name}.gem --no-rdoc --no-ri --ignore-dependencies --local"
|
22
22
|
end
|
23
23
|
|
24
24
|
desc "Uninstall gem"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: readorder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Hinegardner
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-08-24 00:00:00 -06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -23,14 +23,14 @@ dependencies:
|
|
23
23
|
version: 0.0.5
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
|
-
name:
|
26
|
+
name: amalgalite
|
27
27
|
type: :runtime
|
28
28
|
version_requirement:
|
29
29
|
version_requirements: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.
|
33
|
+
version: 0.11.0
|
34
34
|
version:
|
35
35
|
- !ruby/object:Gem::Dependency
|
36
36
|
name: main
|
@@ -60,7 +60,7 @@ dependencies:
|
|
60
60
|
requirements:
|
61
61
|
- - ~>
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version: 1.0.
|
63
|
+
version: 1.0.4
|
64
64
|
version:
|
65
65
|
- !ruby/object:Gem::Dependency
|
66
66
|
name: rake
|
@@ -92,6 +92,7 @@ extra_rdoc_files:
|
|
92
92
|
- lib/readorder/filelist.rb
|
93
93
|
- lib/readorder/log.rb
|
94
94
|
- lib/readorder/paths.rb
|
95
|
+
- lib/readorder/results.rb
|
95
96
|
- lib/readorder/runner.rb
|
96
97
|
- lib/readorder/version.rb
|
97
98
|
- lib/readorder.rb
|
@@ -107,6 +108,7 @@ files:
|
|
107
108
|
- lib/readorder/filelist.rb
|
108
109
|
- lib/readorder/log.rb
|
109
110
|
- lib/readorder/paths.rb
|
111
|
+
- lib/readorder/results.rb
|
110
112
|
- lib/readorder/runner.rb
|
111
113
|
- lib/readorder/version.rb
|
112
114
|
- lib/readorder.rb
|