filey-diff 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ Gemfile.lock
2
+ pkg
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.2
5
+ - 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gemspec
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ # Filey diff
2
+
3
+ [![Build Status](https://secure.travis-ci.org/laurilehmijoki/filey-diff.png)]
4
+ (http://travis-ci.org/laurilehmijoki/filey-diff)
5
+
6
+ A Ruby library for comparing file-like objects from various data sources.
7
+
8
+ ## Central concepts
9
+
10
+ ### Filey
11
+
12
+ A file-like object. Can be, for example, a file system file or an AWS S3
13
+ object.
14
+
15
+ ### Data source
16
+
17
+ Provides Fileys.
18
+
19
+ The current built-in data sources support Amazon Web Services S3 and file
20
+ system.
21
+
22
+ ## Operations
23
+
24
+ ### List outdated files
25
+
26
+ Given two data sources A and B, list the changed files that A has but B doesn't.
27
+
28
+ ### List missing files
29
+
30
+ Given two data sources A and B, list the files that A has but B doesn't.
31
+
32
+ ### List changed files
33
+
34
+ Given two data sources A and B, list the files on A that have a different MD5
35
+ hash than the corresponding file on B.
36
+
37
+ ## Example use cases
38
+
39
+ Arnie has a blog on AWS S3. He has just finished a new post and wants to upload
40
+ only the new post into S3. With the help of Filey diff Arnie can write a Ruby
41
+ program that uploads only the new post and nothing else.
42
+
43
+ ## License
44
+
45
+ Copyright (C) 2012 Lauri Lehmijoki
46
+
47
+ Distributed under the Apache-2.0 license http://www.apache.org/licenses/LICENSE-2.0.html
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ desc "Build the project"
5
+ task :default => 'test'
6
+
7
+ desc "Run tests"
8
+ task :test do
9
+ sh "bundle exec rspec"
10
+ end
@@ -0,0 +1,26 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'filey-diff'
3
+ s.version = '0.0.1'
4
+
5
+ s.summary = "Compare two data sources that contain file-like objects"
6
+ s.description =
7
+ """
8
+ Find missing or outdated files.
9
+ For example, compare your local file system to an AWS S3 bucket.
10
+ """
11
+
12
+ s.authors = ["Lauri Lehmijoki"]
13
+ s.email = 'lauri.lehmijoki@iki.fi'
14
+ s.homepage = 'http://github.com/laurilehmijoki/filey-diff'
15
+
16
+ s.require_paths = %w[lib]
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+
20
+ if RUBY_VERSION < "1.9"
21
+ s.add_dependency 'require_relative', "~> 1.0.3"
22
+ end
23
+
24
+ s.add_development_dependency 'rake', "~> 0.9"
25
+ s.add_development_dependency 'rspec', "~> 2.11"
26
+ end
@@ -0,0 +1,35 @@
1
+ module Filey
2
+ class Comparison
3
+ def self.list_outdated(data_source_a, data_source_b)
4
+ select_in_outer_array(data_source_b, data_source_a) { |b_item, a_item|
5
+ b_item.full_path == a_item.full_path and
6
+ b_item.last_modified < a_item.last_modified
7
+ }
8
+ end
9
+
10
+ def self.list_changed(data_source_a, data_source_b)
11
+ select_in_outer_array(data_source_b, data_source_a) { |b_item, a_item|
12
+ b_item.full_path == a_item.full_path and
13
+ b_item.md5 != a_item.md5
14
+ }
15
+ end
16
+
17
+ def self.list_missing(data_source_a, data_source_b)
18
+ intersection = select_in_outer_array(data_source_a, data_source_b) do
19
+ |a_item, b_item|
20
+ b_item.full_path == a_item.full_path
21
+ end
22
+ data_source_a.get_fileys - intersection
23
+ end
24
+
25
+ private
26
+
27
+ def self.select_in_outer_array(outer, inner)
28
+ outer.get_fileys.select { |outer_item|
29
+ inner.get_fileys.select { |inner_item|
30
+ yield outer_item, inner_item
31
+ }.length > 0
32
+ }
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,28 @@
1
+ module Filey
2
+ module DataSources
3
+ class AwsSdkS3 < DataSource
4
+ def initialize(s3_bucket)
5
+ @s3_bucket = s3_bucket
6
+ end
7
+
8
+ private
9
+
10
+ def do_internal_load
11
+ @s3_bucket.objects.map { |s3_object|
12
+ if (s3_object.key.include?'/')
13
+ path = s3_object.key.scan(/(.*\/).*/).first.first
14
+ name = s3_object.key.scan(/.*\/(.*)/).first.first
15
+ else
16
+ path = ''
17
+ name = s3_object.key
18
+ end
19
+ normalised_path = "./#{path}"
20
+ Filey.new(normalised_path,
21
+ name,
22
+ s3_object.last_modified,
23
+ s3_object.etag.gsub(/"/, ''))
24
+ }
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,13 @@
1
+ module Filey
2
+ module DataSources
3
+ class DataSource
4
+ def get_fileys
5
+ if @cached
6
+ @cached
7
+ else
8
+ @cached = do_internal_load
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,25 @@
1
+ require 'digest/md5'
2
+
3
+ module Filey
4
+ module DataSources
5
+ class FileSystem < DataSource
6
+ def initialize(root_directory)
7
+ @root_directory = root_directory
8
+ end
9
+
10
+ private
11
+
12
+ def do_internal_load
13
+ Dir.glob(@root_directory + '/**/*').select { |file|
14
+ File.file?(file)
15
+ }.map { |file|
16
+ path = file.scan(/(.*\/).*/).first.first.sub(@root_directory, '')
17
+ name = file.scan(/.*\/(.*)/).first.first
18
+ normalised_path = ".#{path}"
19
+ md5 = Digest::MD5.hexdigest(File.read(file))
20
+ Filey.new(normalised_path, name, File.mtime(file), md5)
21
+ }
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,36 @@
1
+ module Filey
2
+ class Filey
3
+ attr_reader :path, :name, :last_modified, :md5
4
+
5
+ def initialize(path, name, last_modified, md5)
6
+ raise InvalidPathError unless path.match(/^\..*\/$/)
7
+ raise InvalidTimeError unless last_modified.instance_of?(Time)
8
+ raise InvalidNameError if name.match(/\//)
9
+ raise InvalidMd5Error unless md5.length == 32
10
+ @path = path
11
+ @name = name
12
+ @last_modified = last_modified
13
+ @md5 = md5
14
+ end
15
+
16
+ def full_path
17
+ @path + @name
18
+ end
19
+
20
+ def <=> (another)
21
+ full_path <=> another.full_path
22
+ end
23
+
24
+ class InvalidTimeError < Exception
25
+ end
26
+
27
+ class InvalidNameError < Exception
28
+ end
29
+
30
+ class InvalidPathError < Exception
31
+ end
32
+
33
+ class InvalidMd5Error < Exception
34
+ end
35
+ end
36
+ end
data/lib/filey-diff.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'require_relative' if RUBY_VERSION < "1.9"
3
+ require_relative 'filey-diff/data-sources/data_source'
4
+ require_relative 'filey-diff/data-sources/aws_sdk_s3'
5
+ require_relative 'filey-diff/data-sources/file_system'
6
+ require_relative 'filey-diff/filey'
7
+ require_relative 'filey-diff/comparison'
@@ -0,0 +1,77 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Filey::Comparison do
4
+ before {
5
+ @scifi = Filey::Filey.new('./', 'scifi.txt', Time.now,
6
+ '9cdfb439c7876e703e307864c9167a15')
7
+ @scifi_changed = Filey::Filey.new('./', 'scifi.txt', Time.now,
8
+ '9cdfb439c7876e703e307864c9167DDD')
9
+ @deep_space = Filey::Filey.new('./', 'abandoned.txt', Time.now,
10
+ '9cdfb439c7876e703e307864c9167a15')
11
+ @outdated_file_object = Filey::Filey.new('./', 'foo.txt', Time.now - 10,
12
+ '9cdfb439c7876e703e307864c9167a15')
13
+ @latest_file_object = Filey::Filey.new('./', 'foo.txt', Time.now,
14
+ '9cdfb439c7876e703e307864c9167a15')
15
+ }
16
+
17
+ context 'finding outdated files' do
18
+ before {
19
+ data_source_a = DataSource.new([ @scifi, @latest_file_object ])
20
+ data_source_b = DataSource.new([ @outdated_file_object, @deep_space ])
21
+ @outdated_file_objects = Filey::Comparison.list_outdated(data_source_a, data_source_b)
22
+ }
23
+
24
+ it 'lists the outdated files when comparing two data sources' do
25
+ @outdated_file_objects.length.should be(1)
26
+ end
27
+
28
+ it 'lists the outdated files when comparing two data sources' do
29
+ @outdated_file_objects.should include(@outdated_file_object)
30
+ end
31
+ end
32
+
33
+ context 'finding missing files' do
34
+ before {
35
+ data_source_a = DataSource.new([ @deep_space ])
36
+ data_source_b = DataSource.new([ @scifi ])
37
+ @missing_file_objects = Filey::Comparison.list_missing(data_source_a, data_source_b)
38
+ }
39
+
40
+ it 'lists the missing files when comparing two data sources' do
41
+ @missing_file_objects.should include(@deep_space)
42
+ end
43
+
44
+ it 'lists the missing files when comparing two data sources' do
45
+ @missing_file_objects.length.should be(1)
46
+ end
47
+ end
48
+
49
+ context 'finding changed files' do
50
+ before {
51
+ data_source_a = DataSource.new([ @scifi ])
52
+ data_source_b = DataSource.new([ @scifi_changed ])
53
+ @changed_files = Filey::Comparison.list_changed(data_source_a, data_source_b)
54
+ }
55
+
56
+ it 'compares filey md5 hashes' do
57
+ @changed_files.should include(@scifi_changed)
58
+ end
59
+
60
+ it 'compares filey md5 hashes' do
61
+ @changed_files.length.should be(1)
62
+ end
63
+
64
+ context 'same md5 hashes' do
65
+ before {
66
+ data_source_a = DataSource.new([ @scifi ])
67
+ data_source_b = DataSource.new([ @scifi ])
68
+ @changed_files = Filey::Comparison.list_changed(data_source_a, data_source_b)
69
+ }
70
+
71
+ it 'notices when the md5 hashes are same' do
72
+ @changed_files.length.should be(0)
73
+ end
74
+ end
75
+ end
76
+
77
+ end
@@ -0,0 +1,65 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ shared_examples "a data source" do |source|
4
+ let(:data_source) { described_class.new(source) }
5
+
6
+ it 'normalises the objects into Fileys' do
7
+ filey = data_source.get_fileys.sort[0]
8
+ filey.path.should eq('./cameron/80s/')
9
+ filey.name.should eq('aliens.txt')
10
+ end
11
+
12
+ it 'provides an md5 hash of the filey content' do
13
+ filey = data_source.get_fileys.sort[0]
14
+ filey.md5.should eq(Digest::MD5.hexdigest('Hudson'))
15
+ end
16
+
17
+ it 'normalises the objects into Fileys' do
18
+ filey = data_source.get_fileys.sort[1]
19
+ filey.path.should eq('./cameron/90s/')
20
+ filey.name.should eq('t2.txt')
21
+ end
22
+
23
+ it 'normalises the objects into Fileys' do
24
+ filey = data_source.get_fileys.sort[2]
25
+ filey.path.should eq('./')
26
+ filey.name.should eq('movies.txt')
27
+ end
28
+
29
+ it 'normalises the objects into Fileys' do
30
+ data_source.get_fileys.each { |file_object|
31
+ file_object.should be_an_instance_of(Filey::Filey)
32
+ }
33
+ end
34
+ end
35
+
36
+ objects = [
37
+ { :path => 'cameron/80s/aliens.txt', :mtime => Time.now,
38
+ :content => 'Hudson' },
39
+ { :path => 'cameron/90s/t2.txt', :mtime => Time.now,
40
+ :content => 't1000' },
41
+ { :path => 'movies.txt', :mtime => Time.now,
42
+ :content => 'foo' }
43
+ ]
44
+
45
+ describe Filey::DataSources::AwsSdkS3 do
46
+ s3_bucket = S3Bucket.new(
47
+ objects.map { |object|
48
+ S3Object.new(object[:path], object[:mtime], object[:content])
49
+ }
50
+ )
51
+ it_should_behave_like "a data source", s3_bucket
52
+ end
53
+
54
+ describe Filey::DataSources::FileSystem do
55
+ require 'tmpdir'
56
+ @directory = Dir.mktmpdir
57
+ objects.each { |object|
58
+ fs_path = "#{@directory}/#{object[:path]}"
59
+ FileUtils.mkdir_p(fs_path.scan(/(.*\/)/).first.first)
60
+ File.open(fs_path, 'w') do |file|
61
+ file.write object[:content]
62
+ end
63
+ }
64
+ it_should_behave_like "a data source", @directory
65
+ end
@@ -0,0 +1,81 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Filey::Filey do
4
+ context 'path validation' do
5
+ it 'requires path to start with a dot and end with a slash' do
6
+ expect {
7
+ Filey::Filey.new('', 'aliens.txt', Time.now,
8
+ '9cdfb439c7876e703e307864c9167a15')
9
+ }.to raise_error(Filey::Filey::InvalidPathError)
10
+ end
11
+
12
+ it 'requires path to start with a dot and end with a slash' do
13
+ expect {
14
+ Filey::Filey.new('.', 'aliens.txt', Time.now,
15
+ '9cdfb439c7876e703e307864c9167a15')
16
+ }.to raise_error(Filey::Filey::InvalidPathError)
17
+ end
18
+
19
+ it 'requires path to start with a dot and end with a slash' do
20
+ expect {
21
+ Filey::Filey.new('/', 'aliens.txt', Time.now,
22
+ '9cdfb439c7876e703e307864c9167a15')
23
+ }.to raise_error(Filey::Filey::InvalidPathError)
24
+ end
25
+
26
+ it 'requires path to start with a dot and end with a slash' do
27
+ Filey::Filey.new('./ripley/', 'aliens.txt', Time.now,
28
+ '9cdfb439c7876e703e307864c9167a15')
29
+ end
30
+
31
+ it 'requires path to start with a dot and end with a slash' do
32
+ Filey::Filey.new('./', 'aliens.txt', Time.now,
33
+ '9cdfb439c7876e703e307864c9167a15')
34
+ end
35
+ end
36
+
37
+ context 'time validation' do
38
+ it 'requires the last_modified property to be an instance of Time' do
39
+ expect {
40
+ Filey::Filey.new('./', 'aliens.txt', '',
41
+ '9cdfb439c7876e703e307864c9167a15')
42
+ }.to raise_error(Filey::Filey::InvalidTimeError)
43
+ end
44
+
45
+ it 'requires the last_modified property to be an instance of Time' do
46
+ Filey::Filey.new('./', 'aliens.txt', Time.now,
47
+ '9cdfb439c7876e703e307864c9167a15')
48
+ end
49
+ end
50
+
51
+ context 'name validation' do
52
+ it 'requires the name not to contain slashes' do
53
+ expect {
54
+ Filey::Filey.new('./', '/aliens.txt', Time.now,
55
+ '9cdfb439c7876e703e307864c9167a15')
56
+ }.to raise_error(Filey::Filey::InvalidNameError)
57
+ end
58
+
59
+ it 'requires the name not to contain slashes' do
60
+ Filey::Filey.new('./', 'aliens.txt', Time.now,
61
+ '9cdfb439c7876e703e307864c9167a15')
62
+ end
63
+ end
64
+
65
+ context 'path with name' do
66
+ it 'can concatenate the path and name' do
67
+ Filey::Filey.new('./ripley/', 'aliens.txt', Time.now,
68
+ '9cdfb439c7876e703e307864c9167a15').
69
+ full_path.should eq('./ripley/aliens.txt')
70
+ end
71
+ end
72
+
73
+ context 'md5 validation' do
74
+ it 'raises an error if the md5 hash is not valid' do
75
+ expect {
76
+ Filey::Filey.new('./ripley/', 'aliens.txt', Time.now,
77
+ 'i-am-not-a-valid-md5-hash')
78
+ }.to raise_error(Filey::Filey::InvalidMd5Error)
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,35 @@
1
+ require 'rspec'
2
+ require 'digest/md5'
3
+ require File.dirname(__FILE__) + '/../lib/filey-diff'
4
+
5
+ class S3Object
6
+ attr_reader :key, :last_modified
7
+
8
+ def initialize(key, last_modified, content)
9
+ @key = key
10
+ @last_modified = last_modified
11
+ @content = content
12
+ end
13
+
14
+ def etag
15
+ Digest::MD5.hexdigest(@content)
16
+ end
17
+ end
18
+
19
+ class S3Bucket
20
+ attr_reader :objects
21
+
22
+ def initialize(s3_objects)
23
+ @objects = s3_objects
24
+ end
25
+ end
26
+
27
+ class DataSource
28
+ def initialize(file_objects)
29
+ @file_objects = file_objects
30
+ end
31
+
32
+ def get_fileys
33
+ @file_objects
34
+ end
35
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: filey-diff
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Lauri Lehmijoki
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-14 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '0.9'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '0.9'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '2.11'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '2.11'
46
+ description: ! "\n Find missing or outdated files.\n For example, compare your
47
+ local file system to an AWS S3 bucket.\n "
48
+ email: lauri.lehmijoki@iki.fi
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - .travis.yml
55
+ - Gemfile
56
+ - README.md
57
+ - Rakefile
58
+ - filey-diff.gemspec
59
+ - lib/filey-diff.rb
60
+ - lib/filey-diff/comparison.rb
61
+ - lib/filey-diff/data-sources/aws_sdk_s3.rb
62
+ - lib/filey-diff/data-sources/data_source.rb
63
+ - lib/filey-diff/data-sources/file_system.rb
64
+ - lib/filey-diff/filey.rb
65
+ - spec/comparison_spec.rb
66
+ - spec/data_sources_spec.rb
67
+ - spec/filey_spec.rb
68
+ - spec/spec_helper.rb
69
+ homepage: http://github.com/laurilehmijoki/filey-diff
70
+ licenses: []
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ! '>='
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ segments:
82
+ - 0
83
+ hash: 4129900562195660113
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ segments:
91
+ - 0
92
+ hash: 4129900562195660113
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 1.8.24
96
+ signing_key:
97
+ specification_version: 3
98
+ summary: Compare two data sources that contain file-like objects
99
+ test_files:
100
+ - spec/comparison_spec.rb
101
+ - spec/data_sources_spec.rb
102
+ - spec/filey_spec.rb
103
+ - spec/spec_helper.rb