nearline 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,9 @@ module Nearline
7
7
  require 'fileutils'
8
8
 
9
9
  belongs_to :file_content
10
+ belongs_to :system
10
11
  has_and_belongs_to_many :manifests
12
+
11
13
 
12
14
  def self.create_for(file_path, manifest)
13
15
  file_information = FileInformation.new(file_path, manifest)
@@ -16,8 +18,14 @@ module Nearline
16
18
  return nil if file_information.path_hash.nil?
17
19
 
18
20
  # If we find an exising entry, use it
19
- hit = self.find_by_path_hash(file_information.path_hash)
20
- return hit unless hit.nil?
21
+ hash = manifest.system.archived_file_lookup_hash
22
+ hit = hash[file_information.path_hash]
23
+
24
+ unless hit.nil?
25
+ af = ArchivedFile.find(hit)
26
+ manifest.archived_files << af
27
+ return af
28
+ end
21
29
 
22
30
  # We need to create a record for either a directory or file
23
31
  archived_file = ArchivedFile.new(
@@ -27,14 +35,16 @@ module Nearline
27
35
  # Find a new directory
28
36
  if (file_information.is_directory)
29
37
  archived_file.save!
38
+ manifest.archived_files << archived_file
30
39
  return archived_file
31
40
  end
32
41
 
33
42
  # Find a new file that needs persisted
34
43
  archived_file.file_content.file_size =
35
44
  [file_information.stat.size].pack('Q').unpack('L').first # HACK for Windows
36
- archived_file.persist(manifest)
37
- archived_file.save!
45
+ archived_file = archived_file.persist(manifest)
46
+ archived_file.save! unless archived_file.nil?
47
+ manifest.archived_files << archived_file
38
48
  archived_file
39
49
 
40
50
  # TODO: Symbolic links, block devices, ...?
@@ -63,7 +73,7 @@ module Nearline
63
73
 
64
74
  def generate_path_hash
65
75
  return nil if @stat.nil?
66
- target = [@manifest.system_name,
76
+ target = [@manifest.system.name,
67
77
  @file_path,
68
78
  @stat.uid,
69
79
  @stat.gid,
@@ -73,14 +83,14 @@ module Nearline
73
83
  end
74
84
 
75
85
  def file_content_entry_for_files_only
76
- return FileContent.fresh_entry unless @is_directory
86
+ return FileContent.new unless @is_directory
77
87
  return nil
78
88
  end
79
89
 
80
90
  def build_parameters
81
91
  return nil if @stat.nil?
82
92
  {
83
- :system_name => @manifest.system_name,
93
+ :system => @manifest.system,
84
94
  :path => @file_path,
85
95
  :path_hash => @path_hash,
86
96
  :file_content => file_content_entry_for_files_only,
@@ -190,19 +200,21 @@ module Nearline
190
200
  manifest.add_log "recorded file length #{file_size} " +
191
201
  "does not match #{self.file_content.file_size} " +
192
202
  "reported by the file system on path: #{self.path}"
203
+ self.file_content.file_size = file_size
193
204
  end
194
205
  end
195
206
 
196
207
  def verify_content(manifest)
197
208
  unless (self.file_content.verified?)
198
- manifest.add_log "failed verification on path: #{self.path}"
209
+ manifest.add_log "file dropped on failed verification on path: #{self.path}"
210
+ self.file_content.orphan_check
211
+ self.destroy
199
212
  end
200
213
  end
201
214
 
202
215
  def unique_sequence_processed?(key,manifest)
203
216
  if self.file_content.unique_fingerprint?(key)
204
217
  self.file_content.fingerprint = key
205
- self.file_content.save!
206
218
  self.save!
207
219
  verify_content(manifest)
208
220
  return true
@@ -211,7 +223,6 @@ module Nearline
211
223
  end
212
224
 
213
225
  def clean_up_duplicate_content
214
- Sequence.delete_all("file_content_id=#{self.file_content.id}")
215
226
  self.file_content.orphan_check
216
227
  end
217
228
 
@@ -4,9 +4,9 @@ module Nearline
4
4
  module Models
5
5
 
6
6
  # Represents a unit of file content which may be
7
- # freely shared across the repository
7
+ # freely shared across the repository.
8
8
  # Its sole responsibility is to preserve and provide
9
- # content access
9
+ # content access.
10
10
  class Block < ActiveRecord::Base
11
11
  require "zlib"
12
12
 
@@ -37,13 +37,22 @@ module Nearline
37
37
  end
38
38
  @content = self.bulk_content
39
39
  end
40
-
41
- def self.for_content(x, old_block = nil)
42
- unless old_block.nil?
43
- if x == old_block.content
44
- return old_block
45
- end
40
+
41
+ def self.id_for_content(x)
42
+ block = Block.new(:bulk_content => x)
43
+ block.calculate_fingerprint
44
+ hit = Block.connection.select_one(
45
+ "select id from blocks where fingerprint='#{block.fingerprint}'"
46
+ )
47
+ unless hit.nil?
48
+ return hit['id']
46
49
  end
50
+ block.attempt_compression
51
+ block.save!
52
+ block.id
53
+ end
54
+
55
+ def self.for_content(x)
47
56
  block = Models::Block.new(:bulk_content => x)
48
57
  block.calculate_fingerprint
49
58
  found = find_by_fingerprint(block.fingerprint)
@@ -1,23 +1,16 @@
1
1
  module Nearline
2
2
  module Models
3
3
 
4
- # Has the responsibility of identifying and
4
+ # Has the responsibility of identifying, restoring and
5
5
  # verifying content
6
6
  class FileContent < ActiveRecord::Base
7
7
  has_many :sequences
8
8
  has_many :archived_files
9
-
10
- def self.fresh_entry
11
- file_content = FileContent.new
12
- file_content.save!
13
- file_content
14
- end
15
9
 
16
10
  def orphan_check
17
- if (self.archived_files.size == 1)
11
+ if (self.archived_files.size < 2)
18
12
  sequences.each do |s|
19
13
  s.destroy
20
- s.block.orphan_check
21
14
  end
22
15
  self.destroy
23
16
  end
@@ -62,21 +55,26 @@ module Nearline
62
55
  class Sequence < ActiveRecord::Base
63
56
  belongs_to :block
64
57
  belongs_to :file_content
58
+
59
+ def after_destroy
60
+ block.orphan_check
61
+ end
65
62
  end
66
63
 
67
64
  class FileSequencer
68
65
  def initialize(file_content)
69
66
  @inc = 0
70
67
  @file_content = file_content
68
+ @file_content.save!
71
69
  end
72
70
 
73
71
  def preserve_content(content)
74
72
  @inc += 1
75
- block = Block.for_content(content)
73
+ block_id = Block.id_for_content(content)
76
74
  sequence = Sequence.new(
77
75
  :sequence => @inc,
78
76
  :file_content_id => @file_content.id,
79
- :block_id => block.id
77
+ :block_id => block_id
80
78
  )
81
79
  sequence.save!
82
80
  sequence
@@ -5,10 +5,14 @@ module Nearline
5
5
  class FileFinder
6
6
  require 'find'
7
7
  def self.recurse(paths, exclusions)
8
+ regex_exclusions = []
9
+ for exclusion in exclusions
10
+ regex_exclusions << /#{exclusion}/
11
+ end
8
12
  paths.each do |path|
9
13
  Find.find(path) do |f|
10
- exclusions.each do |exclusion|
11
- Find.prune if f =~ /#{exclusion}/
14
+ regex_exclusions.each do |ex|
15
+ Find.prune if ex.match(f)
12
16
  end
13
17
  yield f
14
18
  end
@@ -22,22 +26,28 @@ module Nearline
22
26
 
23
27
  has_and_belongs_to_many :archived_files
24
28
  has_many :logs
29
+ belongs_to :system
25
30
 
26
31
  # Just needed when you create a manifest
27
32
  attr_accessor :backup_paths
28
33
  # Just needed when you create a manifest
29
34
  attr_accessor :backup_exclusions
35
+
36
+ def self.new_for_name(system_name)
37
+ system = System.for_name(system_name)
38
+ system.manifests << m = Nearline::Models::Manifest.new
39
+ system.save!
40
+ m
41
+ end
30
42
 
31
- # Underlying implementation of Nearline.backup
32
- def self.backup(system_name, backup_paths, backup_exclusions)
33
- manifest = self.new(:system_name => system_name)
43
+ def self.backup(system, backup_paths, backup_exclusions)
44
+ manifest = self.new(:system => system)
34
45
  manifest.save!
35
46
 
36
47
  FileFinder.recurse(backup_paths, backup_exclusions) do |file_name|
37
48
  $stdout.write file_name + " "
38
49
  af = ArchivedFile.create_for(file_name, manifest)
39
50
  if (!af.nil?)
40
- manifest.archived_files << af
41
51
  $stdout.write "#{Time.at(af.mtime).asctime}"
42
52
  if (!af.file_content.nil?)
43
53
  $stdout.write" (#{af.file_content.file_size} bytes)"
@@ -51,14 +61,8 @@ module Nearline
51
61
  manifest
52
62
  end
53
63
 
54
- # Find the latest Manifest for a system
55
- def self.latest_for(system_name)
56
- m_result = self.connection.select_one("select id from manifests where system_name='#{system_name}' order by created_at desc")
57
- raise "No manifest found" if m_result.nil?
58
- self.find(m_result["id"])
59
- end
60
64
 
61
- # Find all Manifest entries which have never finished.
65
+ # Find all Manifest entries (across all Systems) which have never finished.
62
66
  #
63
67
  # They are:
64
68
  # * Currently under-way
@@ -67,8 +71,8 @@ module Nearline
67
71
  self.find_all_by_completed_at(nil)
68
72
  end
69
73
 
70
- def self.restore_all_missing(system_name)
71
- manifest = latest_for(system_name)
74
+ def self.restore_all_missing(system, latest_date_time = Time.now)
75
+ manifest = system.latest_manifest_as_of(latest_date_time)
72
76
  manifest.restore_all_missing
73
77
  end
74
78
 
@@ -114,7 +118,7 @@ module Nearline
114
118
  # A simple string reporting the performance of the manifest
115
119
  def summary
116
120
  completed = (completed_at.nil?) ? "DNF" : completed_at
117
- "#{system_name}; started: #{created_at}; finished: #{completed}; " +
121
+ "#{system.name} started: #{created_at}; finished: #{completed}; " +
118
122
  "#{archived_files.size} files; #{logs.size} Errors reported"
119
123
  end
120
124
 
@@ -1,16 +1,15 @@
1
1
  module Nearline
2
2
  module_function
3
3
 
4
- # Every model using an ActiveRecord connection
5
- AR_MODELS = [
6
- Nearline::Models::ArchivedFile,
7
- Nearline::Models::Block,
8
- Nearline::Models::FileContent,
9
- Nearline::Models::Manifest,
10
- Nearline::Models::Sequence,
11
- Nearline::Models::Log
12
- ]
4
+ # VERSION of the software
5
+ VERSION = "0.0.4"
13
6
 
7
+ # Array of every Nearline Model using an ActiveRecord connection
8
+ AR_MODELS = Nearline::Models.constants.map do |m|
9
+ Nearline::Models.const_get(m)
10
+ end.select do |c|
11
+ c.superclass == ActiveRecord::Base
12
+ end
14
13
 
15
14
  # Establishes the ActiveRecord connection
16
15
  #
@@ -32,16 +31,14 @@ module Nearline
32
31
  ActiveRecord::Base.establish_connection(
33
32
  YAML.load_file("config/database.yml")[config]
34
33
  )
35
- end
36
-
37
- if (config.is_a? Hash)
34
+ elsif (config.is_a? Hash)
38
35
  ActiveRecord::Base.establish_connection(config)
39
36
  end
40
37
 
41
38
  unless Nearline::Models::Block.table_exists?
42
39
  Nearline::Models.generate_schema
43
40
  end
44
- Nearline::Models::Block.connected?
41
+ nil
45
42
  end
46
43
 
47
44
  # Establishes a connection only to the Nearline ActiveDirectory models
@@ -70,7 +67,7 @@ module Nearline
70
67
  AR_MODELS.each do |m|
71
68
  m.establish_connection(hash)
72
69
  end
73
- Nearline::Models::Block.connected?
70
+ nil
74
71
  end
75
72
 
76
73
  # Performs a backup labeled for system_name,
@@ -82,32 +79,75 @@ module Nearline
82
79
  # been established
83
80
  #
84
81
  # Returns a Manifest for the backup
82
+ #
83
+ # === Examples
84
+ # Backup my laptop, recursing my home folder, skipping .svn folders
85
+ #
86
+ # Nearline.backup('my_laptop','/home/me', '/\\.svn/')
87
+ #
88
+ # Backup my laptop, recurse /home/me and /var/svn
89
+ #
90
+ # Nearline.backup('my_laptop', ['/home/me', '/var/svn']
91
+ #
85
92
  def backup(system_name, backup_paths,backup_exclusions= [])
86
- Nearline::Models::Manifest.backup(
93
+ unless version_check?
94
+ raise SchemaVersionException.for_version(schema_version)
95
+ end
96
+ Nearline::Models::System.backup(
87
97
  system_name,
88
- string_to_array(backup_paths),
89
- string_to_array(backup_exclusions)
98
+ Utilities.string_to_array(backup_paths),
99
+ Utilities.string_to_array(backup_exclusions)
90
100
  )
91
101
  end
92
-
93
- def string_to_array(x)
94
- if x.is_a? String
95
- return [x]
102
+
103
+ module Utilities
104
+ module_function
105
+ def self.string_to_array(x)
106
+ if x.is_a? String
107
+ return [x]
108
+ end
109
+ x
96
110
  end
97
- x
98
111
  end
99
-
112
+
100
113
  # Restore all missing files from the latest backup
101
- # for system_name
114
+ # for system_name and backed up no later than latest_date_time
102
115
  #
103
- # All updated or existing files are left alone
116
+ # All modified or existing files are left alone
104
117
  #
105
118
  # Expects the Nearline database connection has already
106
119
  # been established
107
120
  #
108
121
  # Returns an Array of paths restored
109
- def restore(system_name)
110
- Nearline::Models::Manifest.restore_all_missing(system_name)
122
+ def restore(system_name, latest_date_time = Time.now)
123
+ unless version_check?
124
+ raise SchemaVersionException.for_version(schema_version)
125
+ end
126
+ Nearline::Models::System.restore_all_missing(system_name, latest_date_time)
127
+ end
128
+
129
+
130
+ # Returns the nearline version of the database
131
+ def schema_version
132
+ begin
133
+ return Nearline::Models::Block.connection.select_value(
134
+ "select version from nearline_version"
135
+ )
136
+ rescue
137
+ return ""
138
+ end
139
+ end
140
+
141
+ # Returns true only if the Nearline version matches the schema
142
+ def version_check?
143
+ Nearline::VERSION == schema_version()
144
+ end
145
+
146
+ class SchemaVersionException < Exception
147
+ def self.for_version(v)
148
+ SchemaVersionException.new("Schema #{v} is not the same "+
149
+ "version as nearline #{Nearline::VERSION}!")
150
+ end
111
151
  end
112
152
 
113
153
  end
@@ -12,6 +12,8 @@ module Nearline
12
12
  drop_table :manifests
13
13
  drop_table :archived_files_manifests
14
14
  drop_table :logs
15
+ drop_table :systems
16
+ drop_table :nearline_version
15
17
  end
16
18
  end
17
19
 
@@ -46,9 +48,15 @@ module Nearline
46
48
  :name => "sequence_jn_index"
47
49
 
48
50
  add_index :sequences, [:block_id]
51
+
52
+ create_table :systems do |t|
53
+ t.column :name, :string, :null => false
54
+ end
55
+
56
+ add_index :systems, [:name], :unique => true
49
57
 
50
58
  create_table :archived_files do |t|
51
- t.column :system_name, :string, :null => false
59
+ t.column :system_id, :integer, :null => false
52
60
  t.column :path, :text, :null => false
53
61
  t.column :path_hash, :string, :null => false, :length => 40
54
62
  t.column :file_content_id, :integer
@@ -63,7 +71,7 @@ module Nearline
63
71
 
64
72
  # Manifests are the reference to a collection of archived files
65
73
  create_table :manifests do |t|
66
- t.column :system_name, :string
74
+ t.column :system_id, :integer
67
75
  t.column :created_at, :datetime
68
76
  t.column :completed_at, :datetime
69
77
  end
@@ -86,6 +94,12 @@ module Nearline
86
94
  t.column :message, :text
87
95
  t.column :created_at, :datetime
88
96
  end
97
+
98
+ create_table :nearline_version, :id => false do |t|
99
+ t.column :version, :string
100
+ end
101
+
102
+ execute "insert into nearline_version (version) values ('#{Nearline::VERSION}')"
89
103
  end
90
104
  end
91
105
 
@@ -0,0 +1,73 @@
1
+ module Nearline
2
+ module Models
3
+
4
+ # The System has the responsibility of identifying
5
+ # what the target was for a backup and relating all
6
+ # Manifests and ArchivedFiles associated with the
7
+ # target system.
8
+ class System < ActiveRecord::Base
9
+
10
+ has_many :manifests
11
+ has_many :archived_files
12
+
13
+
14
+ def self.for_name(system_name)
15
+ system = self.find_by_name(system_name)
16
+ return system unless system.nil?
17
+ system = self.new(:name => system_name)
18
+ system.save!
19
+ system
20
+ end
21
+
22
+ # Find the latest Manifest for a system
23
+ # given the latest_date_time as an upper limit
24
+ def latest_manifest_as_of(latest_date_time = Time.now)
25
+ m_result = Manifest.find(:first,
26
+ :conditions =>
27
+ ["system_id = ? and created_at <= ?",
28
+ self.id, latest_date_time],
29
+ :order => "created_at desc"
30
+ )
31
+ raise "No manifest found" if m_result.nil?
32
+ m_result
33
+ end
34
+
35
+ # Method used by the Nearline module to backup the system
36
+ def self.backup(system_name, backup_paths, backup_exclusions)
37
+ system = self.for_name(system_name)
38
+ system.backup(backup_paths, backup_exclusions)
39
+ end
40
+
41
+ def backup(backup_paths, backup_exclusions)
42
+ Manifest.backup(self, backup_paths, backup_exclusions)
43
+ end
44
+
45
+ # Method used by the Nearline module to restore the system
46
+ def self.restore_all_missing(system_name, latest_date_time)
47
+ system = self.for_name(system_name)
48
+ system.restore_all_missing(latest_date_time)
49
+ end
50
+
51
+ def restore_all_missing(latest_date_time)
52
+ Manifest.restore_all_missing(self, latest_date_time)
53
+ end
54
+
55
+ def archived_file_lookup_hash
56
+ return @lookup_hash if !@lookup_hash.nil?
57
+ @lookup_hash = {}
58
+ for af in self.archived_files
59
+ @lookup_hash[af.path_hash] = af.id
60
+ end
61
+ @lookup_hash
62
+ end
63
+
64
+ def before_destroy
65
+ for manifest in self.manifests
66
+ manifest.destroy
67
+ end
68
+ end
69
+
70
+ end
71
+
72
+ end
73
+ end
data/lib/nearline.rb CHANGED
@@ -10,6 +10,7 @@ require 'nearline/schema'
10
10
  # ActiveRecord models
11
11
  require 'nearline/block'
12
12
  require 'nearline/file_content'
13
+ require 'nearline/system'
13
14
  require 'nearline/archived_file'
14
15
  require 'nearline/log'
15
16
  require 'nearline/manifest'
data/tasks/gemspec.rake CHANGED
@@ -1,15 +1,18 @@
1
1
  require 'rake'
2
2
  require 'rake/gempackagetask'
3
3
 
4
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
5
+ require 'nearline'
6
+
4
7
  SPEC = Gem::Specification.new do |s|
5
8
  s.name = "nearline"
6
- s.version = "0.0.3"
9
+ s.version = Nearline::VERSION
7
10
  s.author = "Robert J. Osborne"
8
11
  s.email = "rjo1970@gmail.com"
9
12
  s.summary = "Nearline is a near-line backup and recovery solution"
10
13
  s.description = %{
11
14
  Nearline is a library to make managing near-line file repositories
12
- simple and eleant in pure Ruby.
15
+ simple and elegant in pure Ruby.
13
16
  }
14
17
  s.rubyforge_project = "nearline"
15
18
  s.files = FileList["{tests,lib,doc,tasks}/**/*"].exclude("rdoc").to_a
metadata CHANGED
@@ -3,15 +3,15 @@ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: nearline
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.3
7
- date: 2008-04-07 00:00:00 -04:00
6
+ version: 0.0.4
7
+ date: 2008-04-18 00:00:00 -04:00
8
8
  summary: Nearline is a near-line backup and recovery solution
9
9
  require_paths:
10
10
  - lib
11
11
  email: rjo1970@gmail.com
12
12
  homepage:
13
13
  rubyforge_project: nearline
14
- description: Nearline is a library to make managing near-line file repositories simple and eleant in pure Ruby.
14
+ description: Nearline is a library to make managing near-line file repositories simple and elegant in pure Ruby.
15
15
  autorequire: nearline
16
16
  default_executable:
17
17
  bindir: bin
@@ -37,6 +37,7 @@ files:
37
37
  - lib/nearline/manifest.rb
38
38
  - lib/nearline/module_methods.rb
39
39
  - lib/nearline/schema.rb
40
+ - lib/nearline/system.rb
40
41
  - lib/nearline.rb
41
42
  - tasks/clean.rake
42
43
  - tasks/gemspec.rake