filey-diff 1.4.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/changelog.md +8 -0
- data/filey-diff.gemspec +1 -1
- data/lib/filey-diff/comparison.rb +7 -11
- data/lib/filey-diff/data-sources/aws_sdk_s3.rb +1 -36
- data/spec/data_sources_spec.rb +3 -23
- metadata +11 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cc23f9bf625a91c7d80733abc57047d431ff600a
|
4
|
+
data.tar.gz: 2f6f4ec21efb418cf0e5b9e704d34fe5d26908e5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e37c4f64dda53464693dc2f5b670a1059a2b08867634f36556406083ba0518f556613365da7a9dbd09fa211d273b865ae84f171cba916cca5323da46a1781b7
|
7
|
+
data.tar.gz: c5109d899be4af7c5fa12566cce6a96c2232b2d3abcf9b2fe8aa6cc5e2f10409736ab2662166faaa3f933a886e67e4b201cd278878436927fae3596fefd6ba6c
|
data/changelog.md
CHANGED
@@ -2,6 +2,14 @@
|
|
2
2
|
|
3
3
|
This project is [Semantically Versioned](http://semver.org).
|
4
4
|
|
5
|
+
## 2.0.0
|
6
|
+
|
7
|
+
* Improve performance significantly (see https://github.com/laurilehmijoki/s3_website/issues/44)
|
8
|
+
|
9
|
+
#### Non-backwards compatible changes:
|
10
|
+
* Calculate MD5 of gzipped S3 object using its gzipped content
|
11
|
+
(previously MD5 was always calculated using raw, non-gzipped file content).
|
12
|
+
|
5
13
|
## 1.4.3
|
6
14
|
|
7
15
|
* Fix issue <https://github.com/laurilehmijoki/filey-diff/pull/8>
|
data/filey-diff.gemspec
CHANGED
@@ -2,33 +2,29 @@ module Filey
|
|
2
2
|
class Comparison
|
3
3
|
def self.list_outdated(data_source_a, data_source_b)
|
4
4
|
select_in_outer_array(data_source_b, data_source_a) { |b_item, a_item|
|
5
|
-
b_item.
|
6
|
-
b_item.last_modified < a_item.last_modified
|
5
|
+
!a_item.nil? && b_item.last_modified < a_item.last_modified
|
7
6
|
}
|
8
7
|
end
|
9
8
|
|
10
9
|
def self.list_changed(data_source_a, data_source_b)
|
11
10
|
select_in_outer_array(data_source_b, data_source_a) { |b_item, a_item|
|
12
|
-
b_item.
|
13
|
-
b_item.md5 != a_item.md5
|
11
|
+
!a_item.nil? && b_item.md5 != a_item.md5
|
14
12
|
}
|
15
13
|
end
|
16
14
|
|
17
15
|
def self.list_missing(data_source_a, data_source_b)
|
18
|
-
intersection = select_in_outer_array(data_source_a, data_source_b)
|
19
|
-
|
20
|
-
|
21
|
-
end
|
16
|
+
intersection = select_in_outer_array(data_source_a, data_source_b) { |a_item, b_item|
|
17
|
+
!b_item.nil?
|
18
|
+
}
|
22
19
|
data_source_a.get_fileys - intersection
|
23
20
|
end
|
24
21
|
|
25
22
|
private
|
26
23
|
|
27
24
|
def self.select_in_outer_array(outer, inner)
|
25
|
+
inner_hash = inner.get_fileys.reduce({}) { |h, e| h[e.full_path] = e; h }
|
28
26
|
outer.get_fileys.select { |outer_item|
|
29
|
-
|
30
|
-
yield outer_item, inner_item
|
31
|
-
}.length > 0
|
27
|
+
yield outer_item, inner_hash[outer_item.full_path]
|
32
28
|
}
|
33
29
|
end
|
34
30
|
end
|
@@ -54,13 +54,7 @@ module Filey
|
|
54
54
|
name = s3_object.key
|
55
55
|
end
|
56
56
|
|
57
|
-
|
58
|
-
last_modified, md5 = last_modified_and_md5_from_gzipped(
|
59
|
-
s3_object, path
|
60
|
-
)
|
61
|
-
else
|
62
|
-
last_modified, md5 = last_modified_and_md5(s3_object)
|
63
|
-
end
|
57
|
+
last_modified, md5 = last_modified_and_md5(s3_object)
|
64
58
|
|
65
59
|
normalised_path = "./#{path}"
|
66
60
|
filey = Filey.new(
|
@@ -78,35 +72,6 @@ module Filey
|
|
78
72
|
md5 = s3_object.etag.gsub(/"/, '').split('-',2).first
|
79
73
|
[last_modified, md5]
|
80
74
|
end
|
81
|
-
|
82
|
-
def last_modified_and_md5_from_gzipped(s3_object, path)
|
83
|
-
s3_object_contents = s3_object.read
|
84
|
-
if is_already_decoded s3_object_contents
|
85
|
-
md5 = Digest::MD5.hexdigest(s3_object_contents)
|
86
|
-
[s3_object.last_modified, md5]
|
87
|
-
else
|
88
|
-
tempfile = Tempfile.new(File.basename(path))
|
89
|
-
tempfile.binmode
|
90
|
-
tempfile.write s3_object_contents
|
91
|
-
tempfile.close
|
92
|
-
|
93
|
-
gz = Zlib::GzipReader.open(tempfile.path)
|
94
|
-
last_modified = gz.mtime
|
95
|
-
md5 = Digest::MD5.hexdigest(gz.read)
|
96
|
-
gz.close
|
97
|
-
[last_modified, md5]
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
# Check if the two first bytes are the magic numbers of the gzip format.
|
102
|
-
# We double-check here because Ruby 2.0.0 decodes gzip'ed HTTP responses
|
103
|
-
# automatically. As a result, we get decoded gzip data from the
|
104
|
-
# s3_object#read method when we are using Ruby 2.0.0, and encoded data
|
105
|
-
# when we are using previous versions of Ruby.
|
106
|
-
def is_already_decoded(gzipped_on_server)
|
107
|
-
is_gzipped = gzipped_on_server.bytes.to_a[0] == 0x1f && gzipped_on_server.bytes.to_a[1] == 0x8b
|
108
|
-
is_gzipped == false
|
109
|
-
end
|
110
75
|
end
|
111
76
|
end
|
112
77
|
end
|
data/spec/data_sources_spec.rb
CHANGED
@@ -160,35 +160,15 @@ describe Filey::DataSources::AwsSdkS3 do
|
|
160
160
|
)]))
|
161
161
|
}
|
162
162
|
|
163
|
-
it 'provides the
|
163
|
+
it 'provides the md5/mtime of a gzipped file' do
|
164
164
|
filey = data_source_with_one_gzipped_object.get_fileys[0]
|
165
|
-
|
165
|
+
gzipped_content = IO.binread(gzip_tempfile_and_path[0])
|
166
|
+
filey.md5.should eq(Digest::MD5.hexdigest(gzipped_content))
|
166
167
|
# GzipWriter seems to cut off fractions of a second,
|
167
168
|
# to_i adjusts the original file to match
|
168
169
|
filey.last_modified.to_i.should eq(objects.first[:mtime].to_i)
|
169
170
|
end
|
170
171
|
|
171
|
-
context 'working with Ruby 2.0.0 automatic decoding of gzipped HTTP responses' do
|
172
|
-
let(:data_source_with_decoded_object_and_gzip_header) {
|
173
|
-
data_source = Filey::DataSources::AwsSdkS3.new(S3Bucket.new([S3Object.new(
|
174
|
-
objects.first[:path],
|
175
|
-
objects.first[:mtime],
|
176
|
-
objects.first[:content],
|
177
|
-
{ :content_encoding => 'gzip' }
|
178
|
-
)]))
|
179
|
-
}
|
180
|
-
|
181
|
-
it 'detects the case where the gzipped data has already been decoded' do
|
182
|
-
filey = data_source_with_decoded_object_and_gzip_header.get_fileys.first
|
183
|
-
filey.last_modified.should eq(objects.first[:mtime])
|
184
|
-
end
|
185
|
-
|
186
|
-
it 'returns the md5 of the gzip-decoded content' do
|
187
|
-
filey = data_source_with_decoded_object_and_gzip_header.get_fileys.first
|
188
|
-
filey.md5.should eq(Digest::MD5.hexdigest(objects.first[:content]))
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
172
|
def gzip_into_tmp_file(content, mtime)
|
193
173
|
tempfile = Tempfile.new("temp")
|
194
174
|
|
metadata
CHANGED
@@ -1,41 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filey-diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lauri Lehmijoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-05-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0.9'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0.9'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - ~>
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '2.11'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - ~>
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '2.11'
|
41
41
|
description: "\n Find missing or outdated files.\n For example, compare your
|
@@ -45,8 +45,8 @@ executables: []
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
-
- .gitignore
|
49
|
-
- .travis.yml
|
48
|
+
- ".gitignore"
|
49
|
+
- ".travis.yml"
|
50
50
|
- Gemfile
|
51
51
|
- README.md
|
52
52
|
- Rakefile
|
@@ -72,17 +72,17 @@ require_paths:
|
|
72
72
|
- lib
|
73
73
|
required_ruby_version: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
|
-
- -
|
75
|
+
- - ">="
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
78
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
requirements: []
|
84
84
|
rubyforge_project:
|
85
|
-
rubygems_version: 2.
|
85
|
+
rubygems_version: 2.2.2
|
86
86
|
signing_key:
|
87
87
|
specification_version: 4
|
88
88
|
summary: Compare two data sources that contain file-like objects
|