nearline 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,7 +7,9 @@ module Nearline
7
7
  require 'fileutils'
8
8
 
9
9
  belongs_to :file_content
10
+ belongs_to :system
10
11
  has_and_belongs_to_many :manifests
12
+
11
13
 
12
14
  def self.create_for(file_path, manifest)
13
15
  file_information = FileInformation.new(file_path, manifest)
@@ -16,8 +18,14 @@ module Nearline
16
18
  return nil if file_information.path_hash.nil?
17
19
 
18
20
  # If we find an exising entry, use it
19
- hit = self.find_by_path_hash(file_information.path_hash)
20
- return hit unless hit.nil?
21
+ hash = manifest.system.archived_file_lookup_hash
22
+ hit = hash[file_information.path_hash]
23
+
24
+ unless hit.nil?
25
+ af = ArchivedFile.find(hit)
26
+ manifest.archived_files << af
27
+ return af
28
+ end
21
29
 
22
30
  # We need to create a record for either a directory or file
23
31
  archived_file = ArchivedFile.new(
@@ -27,14 +35,16 @@ module Nearline
27
35
  # Find a new directory
28
36
  if (file_information.is_directory)
29
37
  archived_file.save!
38
+ manifest.archived_files << archived_file
30
39
  return archived_file
31
40
  end
32
41
 
33
42
  # Find a new file that needs persisted
34
43
  archived_file.file_content.file_size =
35
44
  [file_information.stat.size].pack('Q').unpack('L').first # HACK for Windows
36
- archived_file.persist(manifest)
37
- archived_file.save!
45
+ archived_file = archived_file.persist(manifest)
46
+ archived_file.save! unless archived_file.nil?
47
+ manifest.archived_files << archived_file
38
48
  archived_file
39
49
 
40
50
  # TODO: Symbolic links, block devices, ...?
@@ -63,7 +73,7 @@ module Nearline
63
73
 
64
74
  def generate_path_hash
65
75
  return nil if @stat.nil?
66
- target = [@manifest.system_name,
76
+ target = [@manifest.system.name,
67
77
  @file_path,
68
78
  @stat.uid,
69
79
  @stat.gid,
@@ -73,14 +83,14 @@ module Nearline
73
83
  end
74
84
 
75
85
  def file_content_entry_for_files_only
76
- return FileContent.fresh_entry unless @is_directory
86
+ return FileContent.new unless @is_directory
77
87
  return nil
78
88
  end
79
89
 
80
90
  def build_parameters
81
91
  return nil if @stat.nil?
82
92
  {
83
- :system_name => @manifest.system_name,
93
+ :system => @manifest.system,
84
94
  :path => @file_path,
85
95
  :path_hash => @path_hash,
86
96
  :file_content => file_content_entry_for_files_only,
@@ -190,19 +200,21 @@ module Nearline
190
200
  manifest.add_log "recorded file length #{file_size} " +
191
201
  "does not match #{self.file_content.file_size} " +
192
202
  "reported by the file system on path: #{self.path}"
203
+ self.file_content.file_size = file_size
193
204
  end
194
205
  end
195
206
 
196
207
  def verify_content(manifest)
197
208
  unless (self.file_content.verified?)
198
- manifest.add_log "failed verification on path: #{self.path}"
209
+ manifest.add_log "file dropped on failed verification on path: #{self.path}"
210
+ self.file_content.orphan_check
211
+ self.destroy
199
212
  end
200
213
  end
201
214
 
202
215
  def unique_sequence_processed?(key,manifest)
203
216
  if self.file_content.unique_fingerprint?(key)
204
217
  self.file_content.fingerprint = key
205
- self.file_content.save!
206
218
  self.save!
207
219
  verify_content(manifest)
208
220
  return true
@@ -211,7 +223,6 @@ module Nearline
211
223
  end
212
224
 
213
225
  def clean_up_duplicate_content
214
- Sequence.delete_all("file_content_id=#{self.file_content.id}")
215
226
  self.file_content.orphan_check
216
227
  end
217
228
 
@@ -4,9 +4,9 @@ module Nearline
4
4
  module Models
5
5
 
6
6
  # Represents a unit of file content which may be
7
- # freely shared across the repository
7
+ # freely shared across the repository.
8
8
  # Its sole responsibility is to preserve and provide
9
- # content access
9
+ # content access.
10
10
  class Block < ActiveRecord::Base
11
11
  require "zlib"
12
12
 
@@ -37,13 +37,22 @@ module Nearline
37
37
  end
38
38
  @content = self.bulk_content
39
39
  end
40
-
41
- def self.for_content(x, old_block = nil)
42
- unless old_block.nil?
43
- if x == old_block.content
44
- return old_block
45
- end
40
+
41
+ def self.id_for_content(x)
42
+ block = Block.new(:bulk_content => x)
43
+ block.calculate_fingerprint
44
+ hit = Block.connection.select_one(
45
+ "select id from blocks where fingerprint='#{block.fingerprint}'"
46
+ )
47
+ unless hit.nil?
48
+ return hit['id']
46
49
  end
50
+ block.attempt_compression
51
+ block.save!
52
+ block.id
53
+ end
54
+
55
+ def self.for_content(x)
47
56
  block = Models::Block.new(:bulk_content => x)
48
57
  block.calculate_fingerprint
49
58
  found = find_by_fingerprint(block.fingerprint)
@@ -1,23 +1,16 @@
1
1
  module Nearline
2
2
  module Models
3
3
 
4
- # Has the responsibility of identifying and
4
+ # Has the responsibility of identifying, restoring and
5
5
  # verifying content
6
6
  class FileContent < ActiveRecord::Base
7
7
  has_many :sequences
8
8
  has_many :archived_files
9
-
10
- def self.fresh_entry
11
- file_content = FileContent.new
12
- file_content.save!
13
- file_content
14
- end
15
9
 
16
10
  def orphan_check
17
- if (self.archived_files.size == 1)
11
+ if (self.archived_files.size < 2)
18
12
  sequences.each do |s|
19
13
  s.destroy
20
- s.block.orphan_check
21
14
  end
22
15
  self.destroy
23
16
  end
@@ -62,21 +55,26 @@ module Nearline
62
55
  class Sequence < ActiveRecord::Base
63
56
  belongs_to :block
64
57
  belongs_to :file_content
58
+
59
+ def after_destroy
60
+ block.orphan_check
61
+ end
65
62
  end
66
63
 
67
64
  class FileSequencer
68
65
  def initialize(file_content)
69
66
  @inc = 0
70
67
  @file_content = file_content
68
+ @file_content.save!
71
69
  end
72
70
 
73
71
  def preserve_content(content)
74
72
  @inc += 1
75
- block = Block.for_content(content)
73
+ block_id = Block.id_for_content(content)
76
74
  sequence = Sequence.new(
77
75
  :sequence => @inc,
78
76
  :file_content_id => @file_content.id,
79
- :block_id => block.id
77
+ :block_id => block_id
80
78
  )
81
79
  sequence.save!
82
80
  sequence
@@ -5,10 +5,14 @@ module Nearline
5
5
  class FileFinder
6
6
  require 'find'
7
7
  def self.recurse(paths, exclusions)
8
+ regex_exclusions = []
9
+ for exclusion in exclusions
10
+ regex_exclusions << /#{exclusion}/
11
+ end
8
12
  paths.each do |path|
9
13
  Find.find(path) do |f|
10
- exclusions.each do |exclusion|
11
- Find.prune if f =~ /#{exclusion}/
14
+ regex_exclusions.each do |ex|
15
+ Find.prune if ex.match(f)
12
16
  end
13
17
  yield f
14
18
  end
@@ -22,22 +26,28 @@ module Nearline
22
26
 
23
27
  has_and_belongs_to_many :archived_files
24
28
  has_many :logs
29
+ belongs_to :system
25
30
 
26
31
  # Just needed when you create a manifest
27
32
  attr_accessor :backup_paths
28
33
  # Just needed when you create a manifest
29
34
  attr_accessor :backup_exclusions
35
+
36
+ def self.new_for_name(system_name)
37
+ system = System.for_name(system_name)
38
+ system.manifests << m = Nearline::Models::Manifest.new
39
+ system.save!
40
+ m
41
+ end
30
42
 
31
- # Underlying implementation of Nearline.backup
32
- def self.backup(system_name, backup_paths, backup_exclusions)
33
- manifest = self.new(:system_name => system_name)
43
+ def self.backup(system, backup_paths, backup_exclusions)
44
+ manifest = self.new(:system => system)
34
45
  manifest.save!
35
46
 
36
47
  FileFinder.recurse(backup_paths, backup_exclusions) do |file_name|
37
48
  $stdout.write file_name + " "
38
49
  af = ArchivedFile.create_for(file_name, manifest)
39
50
  if (!af.nil?)
40
- manifest.archived_files << af
41
51
  $stdout.write "#{Time.at(af.mtime).asctime}"
42
52
  if (!af.file_content.nil?)
43
53
  $stdout.write" (#{af.file_content.file_size} bytes)"
@@ -51,14 +61,8 @@ module Nearline
51
61
  manifest
52
62
  end
53
63
 
54
- # Find the latest Manifest for a system
55
- def self.latest_for(system_name)
56
- m_result = self.connection.select_one("select id from manifests where system_name='#{system_name}' order by created_at desc")
57
- raise "No manifest found" if m_result.nil?
58
- self.find(m_result["id"])
59
- end
60
64
 
61
- # Find all Manifest entries which have never finished.
65
+ # Find all Manifest entries (across all Systems) which have never finished.
62
66
  #
63
67
  # They are:
64
68
  # * Currently under-way
@@ -67,8 +71,8 @@ module Nearline
67
71
  self.find_all_by_completed_at(nil)
68
72
  end
69
73
 
70
- def self.restore_all_missing(system_name)
71
- manifest = latest_for(system_name)
74
+ def self.restore_all_missing(system, latest_date_time = Time.now)
75
+ manifest = system.latest_manifest_as_of(latest_date_time)
72
76
  manifest.restore_all_missing
73
77
  end
74
78
 
@@ -114,7 +118,7 @@ module Nearline
114
118
  # A simple string reporting the performance of the manifest
115
119
  def summary
116
120
  completed = (completed_at.nil?) ? "DNF" : completed_at
117
- "#{system_name}; started: #{created_at}; finished: #{completed}; " +
121
+ "#{system.name} started: #{created_at}; finished: #{completed}; " +
118
122
  "#{archived_files.size} files; #{logs.size} Errors reported"
119
123
  end
120
124
 
@@ -1,16 +1,15 @@
1
1
  module Nearline
2
2
  module_function
3
3
 
4
- # Every model using an ActiveRecord connection
5
- AR_MODELS = [
6
- Nearline::Models::ArchivedFile,
7
- Nearline::Models::Block,
8
- Nearline::Models::FileContent,
9
- Nearline::Models::Manifest,
10
- Nearline::Models::Sequence,
11
- Nearline::Models::Log
12
- ]
4
+ # VERSION of the software
5
+ VERSION = "0.0.4"
13
6
 
7
+ # Array of every Nearline Model using an ActiveRecord connection
8
+ AR_MODELS = Nearline::Models.constants.map do |m|
9
+ Nearline::Models.const_get(m)
10
+ end.select do |c|
11
+ c.superclass == ActiveRecord::Base
12
+ end
14
13
 
15
14
  # Establishes the ActiveRecord connection
16
15
  #
@@ -32,16 +31,14 @@ module Nearline
32
31
  ActiveRecord::Base.establish_connection(
33
32
  YAML.load_file("config/database.yml")[config]
34
33
  )
35
- end
36
-
37
- if (config.is_a? Hash)
34
+ elsif (config.is_a? Hash)
38
35
  ActiveRecord::Base.establish_connection(config)
39
36
  end
40
37
 
41
38
  unless Nearline::Models::Block.table_exists?
42
39
  Nearline::Models.generate_schema
43
40
  end
44
- Nearline::Models::Block.connected?
41
+ nil
45
42
  end
46
43
 
47
44
  # Establishes a connection only to the Nearline ActiveDirectory models
@@ -70,7 +67,7 @@ module Nearline
70
67
  AR_MODELS.each do |m|
71
68
  m.establish_connection(hash)
72
69
  end
73
- Nearline::Models::Block.connected?
70
+ nil
74
71
  end
75
72
 
76
73
  # Performs a backup labeled for system_name,
@@ -82,32 +79,75 @@ module Nearline
82
79
  # been established
83
80
  #
84
81
  # Returns a Manifest for the backup
82
+ #
83
+ # === Examples
84
+ # Backup my laptop, recursing my home folder, skipping .svn folders
85
+ #
86
+ # Nearline.backup('my_laptop','/home/me', '/\\.svn/')
87
+ #
88
+ # Backup my laptop, recurse /home/me and /var/svn
89
+ #
90
+ # Nearline.backup('my_laptop', ['/home/me', '/var/svn']
91
+ #
85
92
  def backup(system_name, backup_paths,backup_exclusions= [])
86
- Nearline::Models::Manifest.backup(
93
+ unless version_check?
94
+ raise SchemaVersionException.for_version(schema_version)
95
+ end
96
+ Nearline::Models::System.backup(
87
97
  system_name,
88
- string_to_array(backup_paths),
89
- string_to_array(backup_exclusions)
98
+ Utilities.string_to_array(backup_paths),
99
+ Utilities.string_to_array(backup_exclusions)
90
100
  )
91
101
  end
92
-
93
- def string_to_array(x)
94
- if x.is_a? String
95
- return [x]
102
+
103
+ module Utilities
104
+ module_function
105
+ def self.string_to_array(x)
106
+ if x.is_a? String
107
+ return [x]
108
+ end
109
+ x
96
110
  end
97
- x
98
111
  end
99
-
112
+
100
113
  # Restore all missing files from the latest backup
101
- # for system_name
114
+ # for system_name and backed up no later than latest_date_time
102
115
  #
103
- # All updated or existing files are left alone
116
+ # All modified or existing files are left alone
104
117
  #
105
118
  # Expects the Nearline database connection has already
106
119
  # been established
107
120
  #
108
121
  # Returns an Array of paths restored
109
- def restore(system_name)
110
- Nearline::Models::Manifest.restore_all_missing(system_name)
122
+ def restore(system_name, latest_date_time = Time.now)
123
+ unless version_check?
124
+ raise SchemaVersionException.for_version(schema_version)
125
+ end
126
+ Nearline::Models::System.restore_all_missing(system_name, latest_date_time)
127
+ end
128
+
129
+
130
+ # Returns the nearline version of the database
131
+ def schema_version
132
+ begin
133
+ return Nearline::Models::Block.connection.select_value(
134
+ "select version from nearline_version"
135
+ )
136
+ rescue
137
+ return ""
138
+ end
139
+ end
140
+
141
+ # Returns true only if the Nearline version matches the schema
142
+ def version_check?
143
+ Nearline::VERSION == schema_version()
144
+ end
145
+
146
+ class SchemaVersionException < Exception
147
+ def self.for_version(v)
148
+ SchemaVersionException.new("Schema #{v} is not the same "+
149
+ "version as nearline #{Nearline::VERSION}!")
150
+ end
111
151
  end
112
152
 
113
153
  end
@@ -12,6 +12,8 @@ module Nearline
12
12
  drop_table :manifests
13
13
  drop_table :archived_files_manifests
14
14
  drop_table :logs
15
+ drop_table :systems
16
+ drop_table :nearline_version
15
17
  end
16
18
  end
17
19
 
@@ -46,9 +48,15 @@ module Nearline
46
48
  :name => "sequence_jn_index"
47
49
 
48
50
  add_index :sequences, [:block_id]
51
+
52
+ create_table :systems do |t|
53
+ t.column :name, :string, :null => false
54
+ end
55
+
56
+ add_index :systems, [:name], :unique => true
49
57
 
50
58
  create_table :archived_files do |t|
51
- t.column :system_name, :string, :null => false
59
+ t.column :system_id, :integer, :null => false
52
60
  t.column :path, :text, :null => false
53
61
  t.column :path_hash, :string, :null => false, :length => 40
54
62
  t.column :file_content_id, :integer
@@ -63,7 +71,7 @@ module Nearline
63
71
 
64
72
  # Manifests are the reference to a collection of archived files
65
73
  create_table :manifests do |t|
66
- t.column :system_name, :string
74
+ t.column :system_id, :integer
67
75
  t.column :created_at, :datetime
68
76
  t.column :completed_at, :datetime
69
77
  end
@@ -86,6 +94,12 @@ module Nearline
86
94
  t.column :message, :text
87
95
  t.column :created_at, :datetime
88
96
  end
97
+
98
+ create_table :nearline_version, :id => false do |t|
99
+ t.column :version, :string
100
+ end
101
+
102
+ execute "insert into nearline_version (version) values ('#{Nearline::VERSION}')"
89
103
  end
90
104
  end
91
105
 
@@ -0,0 +1,73 @@
1
+ module Nearline
2
+ module Models
3
+
4
+ # The System has the responsibility of identifying
5
+ # what the target was for a backup and relating all
6
+ # Manifests and ArchivedFiles associated with the
7
+ # target system.
8
+ class System < ActiveRecord::Base
9
+
10
+ has_many :manifests
11
+ has_many :archived_files
12
+
13
+
14
+ def self.for_name(system_name)
15
+ system = self.find_by_name(system_name)
16
+ return system unless system.nil?
17
+ system = self.new(:name => system_name)
18
+ system.save!
19
+ system
20
+ end
21
+
22
+ # Find the latest Manifest for a system
23
+ # given the latest_date_time as an upper limit
24
+ def latest_manifest_as_of(latest_date_time = Time.now)
25
+ m_result = Manifest.find(:first,
26
+ :conditions =>
27
+ ["system_id = ? and created_at <= ?",
28
+ self.id, latest_date_time],
29
+ :order => "created_at desc"
30
+ )
31
+ raise "No manifest found" if m_result.nil?
32
+ m_result
33
+ end
34
+
35
+ # Method used by the Nearline module to backup the system
36
+ def self.backup(system_name, backup_paths, backup_exclusions)
37
+ system = self.for_name(system_name)
38
+ system.backup(backup_paths, backup_exclusions)
39
+ end
40
+
41
+ def backup(backup_paths, backup_exclusions)
42
+ Manifest.backup(self, backup_paths, backup_exclusions)
43
+ end
44
+
45
+ # Method used by the Nearline module to restore the system
46
+ def self.restore_all_missing(system_name, latest_date_time)
47
+ system = self.for_name(system_name)
48
+ system.restore_all_missing(latest_date_time)
49
+ end
50
+
51
+ def restore_all_missing(latest_date_time)
52
+ Manifest.restore_all_missing(self, latest_date_time)
53
+ end
54
+
55
+ def archived_file_lookup_hash
56
+ return @lookup_hash if !@lookup_hash.nil?
57
+ @lookup_hash = {}
58
+ for af in self.archived_files
59
+ @lookup_hash[af.path_hash] = af.id
60
+ end
61
+ @lookup_hash
62
+ end
63
+
64
+ def before_destroy
65
+ for manifest in self.manifests
66
+ manifest.destroy
67
+ end
68
+ end
69
+
70
+ end
71
+
72
+ end
73
+ end
data/lib/nearline.rb CHANGED
@@ -10,6 +10,7 @@ require 'nearline/schema'
10
10
  # ActiveRecord models
11
11
  require 'nearline/block'
12
12
  require 'nearline/file_content'
13
+ require 'nearline/system'
13
14
  require 'nearline/archived_file'
14
15
  require 'nearline/log'
15
16
  require 'nearline/manifest'
data/tasks/gemspec.rake CHANGED
@@ -1,15 +1,18 @@
1
1
  require 'rake'
2
2
  require 'rake/gempackagetask'
3
3
 
4
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
5
+ require 'nearline'
6
+
4
7
  SPEC = Gem::Specification.new do |s|
5
8
  s.name = "nearline"
6
- s.version = "0.0.3"
9
+ s.version = Nearline::VERSION
7
10
  s.author = "Robert J. Osborne"
8
11
  s.email = "rjo1970@gmail.com"
9
12
  s.summary = "Nearline is a near-line backup and recovery solution"
10
13
  s.description = %{
11
14
  Nearline is a library to make managing near-line file repositories
12
- simple and eleant in pure Ruby.
15
+ simple and elegant in pure Ruby.
13
16
  }
14
17
  s.rubyforge_project = "nearline"
15
18
  s.files = FileList["{tests,lib,doc,tasks}/**/*"].exclude("rdoc").to_a
metadata CHANGED
@@ -3,15 +3,15 @@ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: nearline
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.3
7
- date: 2008-04-07 00:00:00 -04:00
6
+ version: 0.0.4
7
+ date: 2008-04-18 00:00:00 -04:00
8
8
  summary: Nearline is a near-line backup and recovery solution
9
9
  require_paths:
10
10
  - lib
11
11
  email: rjo1970@gmail.com
12
12
  homepage:
13
13
  rubyforge_project: nearline
14
- description: Nearline is a library to make managing near-line file repositories simple and eleant in pure Ruby.
14
+ description: Nearline is a library to make managing near-line file repositories simple and elegant in pure Ruby.
15
15
  autorequire: nearline
16
16
  default_executable:
17
17
  bindir: bin
@@ -37,6 +37,7 @@ files:
37
37
  - lib/nearline/manifest.rb
38
38
  - lib/nearline/module_methods.rb
39
39
  - lib/nearline/schema.rb
40
+ - lib/nearline/system.rb
40
41
  - lib/nearline.rb
41
42
  - tasks/clean.rake
42
43
  - tasks/gemspec.rake