filey-diff 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ Gemfile.lock
2
+ pkg
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.2
5
+ - 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gemspec
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ # Filey diff
2
+
3
+ [![Build Status](https://secure.travis-ci.org/laurilehmijoki/filey-diff.png)]
4
+ (http://travis-ci.org/laurilehmijoki/filey-diff)
5
+
6
+ A Ruby library for comparing file-like objects from various data sources.
7
+
8
+ ## Central concepts
9
+
10
+ ### Filey
11
+
12
+ A file-like object. Can be, for example, a file system file or an AWS S3
13
+ object.
14
+
15
+ ### Data source
16
+
17
+ Provides Fileys.
18
+
19
+ The current built-in data sources support Amazon Web Services S3 and file
20
+ system.
21
+
22
+ ## Operations
23
+
24
+ ### List outdated files
25
+
26
+ Given two data sources A and B, list the changed files that A has but B doesn't.
27
+
28
+ ### List missing files
29
+
30
+ Given two data sources A and B, list the files that A has but B doesn't.
31
+
32
+ ### List changed files
33
+
34
+ Given two data sources A and B, list the files on A that have a different MD5
35
+ hash than the corresponding file on B.
36
+
37
+ ## Example use cases
38
+
39
+ Arnie has a blog on AWS S3. He has just finished a new post and wants to upload
40
+ only the new post into S3. With the help of Filey diff Arnie can write a Ruby
41
+ program that uploads only the new post and nothing else.
42
+
43
+ ## License
44
+
45
+ Copyright (C) 2012 Lauri Lehmijoki
46
+
47
+ Distributed under the Apache-2.0 license http://www.apache.org/licenses/LICENSE-2.0.html
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ desc "Build the project"
5
+ task :default => 'test'
6
+
7
+ desc "Run tests"
8
+ task :test do
9
+ sh "bundle exec rspec"
10
+ end
@@ -0,0 +1,26 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'filey-diff'
3
+ s.version = '0.0.1'
4
+
5
+ s.summary = "Compare two data sources that contain file-like objects"
6
+ s.description =
7
+ """
8
+ Find missing or outdated files.
9
+ For example, compare your local file system to an AWS S3 bucket.
10
+ """
11
+
12
+ s.authors = ["Lauri Lehmijoki"]
13
+ s.email = 'lauri.lehmijoki@iki.fi'
14
+ s.homepage = 'http://github.com/laurilehmijoki/filey-diff'
15
+
16
+ s.require_paths = %w[lib]
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+
20
+ if RUBY_VERSION < "1.9"
21
+ s.add_dependency 'require_relative', "~> 1.0.3"
22
+ end
23
+
24
+ s.add_development_dependency 'rake', "~> 0.9"
25
+ s.add_development_dependency 'rspec', "~> 2.11"
26
+ end
@@ -0,0 +1,35 @@
1
+ module Filey
2
+ class Comparison
3
+ def self.list_outdated(data_source_a, data_source_b)
4
+ select_in_outer_array(data_source_b, data_source_a) { |b_item, a_item|
5
+ b_item.full_path == a_item.full_path and
6
+ b_item.last_modified < a_item.last_modified
7
+ }
8
+ end
9
+
10
+ def self.list_changed(data_source_a, data_source_b)
11
+ select_in_outer_array(data_source_b, data_source_a) { |b_item, a_item|
12
+ b_item.full_path == a_item.full_path and
13
+ b_item.md5 != a_item.md5
14
+ }
15
+ end
16
+
17
+ def self.list_missing(data_source_a, data_source_b)
18
+ intersection = select_in_outer_array(data_source_a, data_source_b) do
19
+ |a_item, b_item|
20
+ b_item.full_path == a_item.full_path
21
+ end
22
+ data_source_a.get_fileys - intersection
23
+ end
24
+
25
+ private
26
+
27
+ def self.select_in_outer_array(outer, inner)
28
+ outer.get_fileys.select { |outer_item|
29
+ inner.get_fileys.select { |inner_item|
30
+ yield outer_item, inner_item
31
+ }.length > 0
32
+ }
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,28 @@
1
+ module Filey
2
+ module DataSources
3
+ class AwsSdkS3 < DataSource
4
+ def initialize(s3_bucket)
5
+ @s3_bucket = s3_bucket
6
+ end
7
+
8
+ private
9
+
10
+ def do_internal_load
11
+ @s3_bucket.objects.map { |s3_object|
12
+ if (s3_object.key.include?'/')
13
+ path = s3_object.key.scan(/(.*\/).*/).first.first
14
+ name = s3_object.key.scan(/.*\/(.*)/).first.first
15
+ else
16
+ path = ''
17
+ name = s3_object.key
18
+ end
19
+ normalised_path = "./#{path}"
20
+ Filey.new(normalised_path,
21
+ name,
22
+ s3_object.last_modified,
23
+ s3_object.etag.gsub(/"/, ''))
24
+ }
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,13 @@
1
+ module Filey
2
+ module DataSources
3
+ class DataSource
4
+ def get_fileys
5
+ if @cached
6
+ @cached
7
+ else
8
+ @cached = do_internal_load
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,25 @@
1
+ require 'digest/md5'
2
+
3
+ module Filey
4
+ module DataSources
5
+ class FileSystem < DataSource
6
+ def initialize(root_directory)
7
+ @root_directory = root_directory
8
+ end
9
+
10
+ private
11
+
12
+ def do_internal_load
13
+ Dir.glob(@root_directory + '/**/*').select { |file|
14
+ File.file?(file)
15
+ }.map { |file|
16
+ path = file.scan(/(.*\/).*/).first.first.sub(@root_directory, '')
17
+ name = file.scan(/.*\/(.*)/).first.first
18
+ normalised_path = ".#{path}"
19
+ md5 = Digest::MD5.hexdigest(File.read(file))
20
+ Filey.new(normalised_path, name, File.mtime(file), md5)
21
+ }
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,36 @@
1
+ module Filey
2
+ class Filey
3
+ attr_reader :path, :name, :last_modified, :md5
4
+
5
+ def initialize(path, name, last_modified, md5)
6
+ raise InvalidPathError unless path.match(/^\..*\/$/)
7
+ raise InvalidTimeError unless last_modified.instance_of?(Time)
8
+ raise InvalidNameError if name.match(/\//)
9
+ raise InvalidMd5Error unless md5.length == 32
10
+ @path = path
11
+ @name = name
12
+ @last_modified = last_modified
13
+ @md5 = md5
14
+ end
15
+
16
+ def full_path
17
+ @path + @name
18
+ end
19
+
20
+ def <=> (another)
21
+ full_path <=> another.full_path
22
+ end
23
+
24
+ class InvalidTimeError < Exception
25
+ end
26
+
27
+ class InvalidNameError < Exception
28
+ end
29
+
30
+ class InvalidPathError < Exception
31
+ end
32
+
33
+ class InvalidMd5Error < Exception
34
+ end
35
+ end
36
+ end
data/lib/filey-diff.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'require_relative' if RUBY_VERSION < "1.9"
3
+ require_relative 'filey-diff/data-sources/data_source'
4
+ require_relative 'filey-diff/data-sources/aws_sdk_s3'
5
+ require_relative 'filey-diff/data-sources/file_system'
6
+ require_relative 'filey-diff/filey'
7
+ require_relative 'filey-diff/comparison'
@@ -0,0 +1,77 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Filey::Comparison do
4
+ before {
5
+ @scifi = Filey::Filey.new('./', 'scifi.txt', Time.now,
6
+ '9cdfb439c7876e703e307864c9167a15')
7
+ @scifi_changed = Filey::Filey.new('./', 'scifi.txt', Time.now,
8
+ '9cdfb439c7876e703e307864c9167DDD')
9
+ @deep_space = Filey::Filey.new('./', 'abandoned.txt', Time.now,
10
+ '9cdfb439c7876e703e307864c9167a15')
11
+ @outdated_file_object = Filey::Filey.new('./', 'foo.txt', Time.now - 10,
12
+ '9cdfb439c7876e703e307864c9167a15')
13
+ @latest_file_object = Filey::Filey.new('./', 'foo.txt', Time.now,
14
+ '9cdfb439c7876e703e307864c9167a15')
15
+ }
16
+
17
+ context 'finding outdated files' do
18
+ before {
19
+ data_source_a = DataSource.new([ @scifi, @latest_file_object ])
20
+ data_source_b = DataSource.new([ @outdated_file_object, @deep_space ])
21
+ @outdated_file_objects = Filey::Comparison.list_outdated(data_source_a, data_source_b)
22
+ }
23
+
24
+ it 'lists the outdated files when comparing two data sources' do
25
+ @outdated_file_objects.length.should be(1)
26
+ end
27
+
28
+ it 'lists the outdated files when comparing two data sources' do
29
+ @outdated_file_objects.should include(@outdated_file_object)
30
+ end
31
+ end
32
+
33
+ context 'finding missing files' do
34
+ before {
35
+ data_source_a = DataSource.new([ @deep_space ])
36
+ data_source_b = DataSource.new([ @scifi ])
37
+ @missing_file_objects = Filey::Comparison.list_missing(data_source_a, data_source_b)
38
+ }
39
+
40
+ it 'lists the missing files when comparing two data sources' do
41
+ @missing_file_objects.should include(@deep_space)
42
+ end
43
+
44
+ it 'lists the missing files when comparing two data sources' do
45
+ @missing_file_objects.length.should be(1)
46
+ end
47
+ end
48
+
49
+ context 'finding changed files' do
50
+ before {
51
+ data_source_a = DataSource.new([ @scifi ])
52
+ data_source_b = DataSource.new([ @scifi_changed ])
53
+ @changed_files = Filey::Comparison.list_changed(data_source_a, data_source_b)
54
+ }
55
+
56
+ it 'compares filey md5 hashes' do
57
+ @changed_files.should include(@scifi_changed)
58
+ end
59
+
60
+ it 'compares filey md5 hashes' do
61
+ @changed_files.length.should be(1)
62
+ end
63
+
64
+ context 'same md5 hashes' do
65
+ before {
66
+ data_source_a = DataSource.new([ @scifi ])
67
+ data_source_b = DataSource.new([ @scifi ])
68
+ @changed_files = Filey::Comparison.list_changed(data_source_a, data_source_b)
69
+ }
70
+
71
+ it 'notices when the md5 hashes are same' do
72
+ @changed_files.length.should be(0)
73
+ end
74
+ end
75
+ end
76
+
77
+ end
@@ -0,0 +1,65 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ shared_examples "a data source" do |source|
4
+ let(:data_source) { described_class.new(source) }
5
+
6
+ it 'normalises the objects into Fileys' do
7
+ filey = data_source.get_fileys.sort[0]
8
+ filey.path.should eq('./cameron/80s/')
9
+ filey.name.should eq('aliens.txt')
10
+ end
11
+
12
+ it 'provides an md5 hash of the filey content' do
13
+ filey = data_source.get_fileys.sort[0]
14
+ filey.md5.should eq(Digest::MD5.hexdigest('Hudson'))
15
+ end
16
+
17
+ it 'normalises the objects into Fileys' do
18
+ filey = data_source.get_fileys.sort[1]
19
+ filey.path.should eq('./cameron/90s/')
20
+ filey.name.should eq('t2.txt')
21
+ end
22
+
23
+ it 'normalises the objects into Fileys' do
24
+ filey = data_source.get_fileys.sort[2]
25
+ filey.path.should eq('./')
26
+ filey.name.should eq('movies.txt')
27
+ end
28
+
29
+ it 'normalises the objects into Fileys' do
30
+ data_source.get_fileys.each { |file_object|
31
+ file_object.should be_an_instance_of(Filey::Filey)
32
+ }
33
+ end
34
+ end
35
+
36
+ objects = [
37
+ { :path => 'cameron/80s/aliens.txt', :mtime => Time.now,
38
+ :content => 'Hudson' },
39
+ { :path => 'cameron/90s/t2.txt', :mtime => Time.now,
40
+ :content => 't1000' },
41
+ { :path => 'movies.txt', :mtime => Time.now,
42
+ :content => 'foo' }
43
+ ]
44
+
45
+ describe Filey::DataSources::AwsSdkS3 do
46
+ s3_bucket = S3Bucket.new(
47
+ objects.map { |object|
48
+ S3Object.new(object[:path], object[:mtime], object[:content])
49
+ }
50
+ )
51
+ it_should_behave_like "a data source", s3_bucket
52
+ end
53
+
54
+ describe Filey::DataSources::FileSystem do
55
+ require 'tmpdir'
56
+ @directory = Dir.mktmpdir
57
+ objects.each { |object|
58
+ fs_path = "#{@directory}/#{object[:path]}"
59
+ FileUtils.mkdir_p(fs_path.scan(/(.*\/)/).first.first)
60
+ File.open(fs_path, 'w') do |file|
61
+ file.write object[:content]
62
+ end
63
+ }
64
+ it_should_behave_like "a data source", @directory
65
+ end
@@ -0,0 +1,81 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Filey::Filey do
4
+ context 'path validation' do
5
+ it 'requires path to start with a dot and end with a slash' do
6
+ expect {
7
+ Filey::Filey.new('', 'aliens.txt', Time.now,
8
+ '9cdfb439c7876e703e307864c9167a15')
9
+ }.to raise_error(Filey::Filey::InvalidPathError)
10
+ end
11
+
12
+ it 'requires path to start with a dot and end with a slash' do
13
+ expect {
14
+ Filey::Filey.new('.', 'aliens.txt', Time.now,
15
+ '9cdfb439c7876e703e307864c9167a15')
16
+ }.to raise_error(Filey::Filey::InvalidPathError)
17
+ end
18
+
19
+ it 'requires path to start with a dot and end with a slash' do
20
+ expect {
21
+ Filey::Filey.new('/', 'aliens.txt', Time.now,
22
+ '9cdfb439c7876e703e307864c9167a15')
23
+ }.to raise_error(Filey::Filey::InvalidPathError)
24
+ end
25
+
26
+ it 'requires path to start with a dot and end with a slash' do
27
+ Filey::Filey.new('./ripley/', 'aliens.txt', Time.now,
28
+ '9cdfb439c7876e703e307864c9167a15')
29
+ end
30
+
31
+ it 'requires path to start with a dot and end with a slash' do
32
+ Filey::Filey.new('./', 'aliens.txt', Time.now,
33
+ '9cdfb439c7876e703e307864c9167a15')
34
+ end
35
+ end
36
+
37
+ context 'time validation' do
38
+ it 'requires the last_modified property to be an instance of Time' do
39
+ expect {
40
+ Filey::Filey.new('./', 'aliens.txt', '',
41
+ '9cdfb439c7876e703e307864c9167a15')
42
+ }.to raise_error(Filey::Filey::InvalidTimeError)
43
+ end
44
+
45
+ it 'requires the last_modified property to be an instance of Time' do
46
+ Filey::Filey.new('./', 'aliens.txt', Time.now,
47
+ '9cdfb439c7876e703e307864c9167a15')
48
+ end
49
+ end
50
+
51
+ context 'name validation' do
52
+ it 'requires the name not to contain slashes' do
53
+ expect {
54
+ Filey::Filey.new('./', '/aliens.txt', Time.now,
55
+ '9cdfb439c7876e703e307864c9167a15')
56
+ }.to raise_error(Filey::Filey::InvalidNameError)
57
+ end
58
+
59
+ it 'requires the name not to contain slashes' do
60
+ Filey::Filey.new('./', 'aliens.txt', Time.now,
61
+ '9cdfb439c7876e703e307864c9167a15')
62
+ end
63
+ end
64
+
65
+ context 'path with name' do
66
+ it 'can concatenate the path and name' do
67
+ Filey::Filey.new('./ripley/', 'aliens.txt', Time.now,
68
+ '9cdfb439c7876e703e307864c9167a15').
69
+ full_path.should eq('./ripley/aliens.txt')
70
+ end
71
+ end
72
+
73
+ context 'md5 validation' do
74
+ it 'raises an error if the md5 hash is not valid' do
75
+ expect {
76
+ Filey::Filey.new('./ripley/', 'aliens.txt', Time.now,
77
+ 'i-am-not-a-valid-md5-hash')
78
+ }.to raise_error(Filey::Filey::InvalidMd5Error)
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,35 @@
1
+ require 'rspec'
2
+ require 'digest/md5'
3
+ require File.dirname(__FILE__) + '/../lib/filey-diff'
4
+
5
+ class S3Object
6
+ attr_reader :key, :last_modified
7
+
8
+ def initialize(key, last_modified, content)
9
+ @key = key
10
+ @last_modified = last_modified
11
+ @content = content
12
+ end
13
+
14
+ def etag
15
+ Digest::MD5.hexdigest(@content)
16
+ end
17
+ end
18
+
19
+ class S3Bucket
20
+ attr_reader :objects
21
+
22
+ def initialize(s3_objects)
23
+ @objects = s3_objects
24
+ end
25
+ end
26
+
27
+ class DataSource
28
+ def initialize(file_objects)
29
+ @file_objects = file_objects
30
+ end
31
+
32
+ def get_fileys
33
+ @file_objects
34
+ end
35
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: filey-diff
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Lauri Lehmijoki
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-14 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '0.9'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '0.9'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '2.11'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '2.11'
46
+ description: ! "\n Find missing or outdated files.\n For example, compare your
47
+ local file system to an AWS S3 bucket.\n "
48
+ email: lauri.lehmijoki@iki.fi
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - .travis.yml
55
+ - Gemfile
56
+ - README.md
57
+ - Rakefile
58
+ - filey-diff.gemspec
59
+ - lib/filey-diff.rb
60
+ - lib/filey-diff/comparison.rb
61
+ - lib/filey-diff/data-sources/aws_sdk_s3.rb
62
+ - lib/filey-diff/data-sources/data_source.rb
63
+ - lib/filey-diff/data-sources/file_system.rb
64
+ - lib/filey-diff/filey.rb
65
+ - spec/comparison_spec.rb
66
+ - spec/data_sources_spec.rb
67
+ - spec/filey_spec.rb
68
+ - spec/spec_helper.rb
69
+ homepage: http://github.com/laurilehmijoki/filey-diff
70
+ licenses: []
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ! '>='
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ segments:
82
+ - 0
83
+ hash: 4129900562195660113
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ segments:
91
+ - 0
92
+ hash: 4129900562195660113
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 1.8.24
96
+ signing_key:
97
+ specification_version: 3
98
+ summary: Compare two data sources that contain file-like objects
99
+ test_files:
100
+ - spec/comparison_spec.rb
101
+ - spec/data_sources_spec.rb
102
+ - spec/filey_spec.rb
103
+ - spec/spec_helper.rb