filey-diff 1.4.3 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d859b05f01d263d190fe2458c2b2ffc4b58395ce
4
- data.tar.gz: 3fbda83260167397c8cd1bb7b71ff63b63103394
3
+ metadata.gz: cc23f9bf625a91c7d80733abc57047d431ff600a
4
+ data.tar.gz: 2f6f4ec21efb418cf0e5b9e704d34fe5d26908e5
5
5
  SHA512:
6
- metadata.gz: 78cccad67134289ccf0be21024ba3f37e90abd7bac9278e830bdd14c16974f633a21672923fb7834fe695504585095abd61ad5d01258c321c642ebba7a2ea000
7
- data.tar.gz: 7210db02b36df164b4684f2492f45f01fbdb62e37ea0e3ce55172b004dc43adcf6f3c21259c17af0faa2f80ab4e4faa6d60261c9066d2e3a11022354bf304007
6
+ metadata.gz: 7e37c4f64dda53464693dc2f5b670a1059a2b08867634f36556406083ba0518f556613365da7a9dbd09fa211d273b865ae84f171cba916cca5323da46a1781b7
7
+ data.tar.gz: c5109d899be4af7c5fa12566cce6a96c2232b2d3abcf9b2fe8aa6cc5e2f10409736ab2662166faaa3f933a886e67e4b201cd278878436927fae3596fefd6ba6c
@@ -2,6 +2,14 @@
2
2
 
3
3
  This project is [Semantically Versioned](http://semver.org).
4
4
 
5
+ ## 2.0.0
6
+
7
+ * Improve performance significantly (see https://github.com/laurilehmijoki/s3_website/issues/44)
8
+
9
+ #### Non-backwards compatible changes:
10
+ * Calculate MD5 of gzipped S3 object using its gzipped content
11
+ (previously MD5 was always calculated using raw, non-gzipped file content).
12
+
5
13
  ## 1.4.3
6
14
 
7
15
  * Fix issue <https://github.com/laurilehmijoki/filey-diff/pull/8>
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'filey-diff'
3
- s.version = '1.4.3'
3
+ s.version = '2.0.0'
4
4
 
5
5
  s.summary = "Compare two data sources that contain file-like objects"
6
6
  s.description =
@@ -2,33 +2,29 @@ module Filey
2
2
  class Comparison
3
3
  def self.list_outdated(data_source_a, data_source_b)
4
4
  select_in_outer_array(data_source_b, data_source_a) { |b_item, a_item|
5
- b_item.full_path == a_item.full_path and
6
- b_item.last_modified < a_item.last_modified
5
+ !a_item.nil? && b_item.last_modified < a_item.last_modified
7
6
  }
8
7
  end
9
8
 
10
9
  def self.list_changed(data_source_a, data_source_b)
11
10
  select_in_outer_array(data_source_b, data_source_a) { |b_item, a_item|
12
- b_item.full_path == a_item.full_path and
13
- b_item.md5 != a_item.md5
11
+ !a_item.nil? && b_item.md5 != a_item.md5
14
12
  }
15
13
  end
16
14
 
17
15
  def self.list_missing(data_source_a, data_source_b)
18
- intersection = select_in_outer_array(data_source_a, data_source_b) do
19
- |a_item, b_item|
20
- b_item.full_path == a_item.full_path
21
- end
16
+ intersection = select_in_outer_array(data_source_a, data_source_b) { |a_item, b_item|
17
+ !b_item.nil?
18
+ }
22
19
  data_source_a.get_fileys - intersection
23
20
  end
24
21
 
25
22
  private
26
23
 
27
24
  def self.select_in_outer_array(outer, inner)
25
+ inner_hash = inner.get_fileys.reduce({}) { |h, e| h[e.full_path] = e; h }
28
26
  outer.get_fileys.select { |outer_item|
29
- inner.get_fileys.select { |inner_item|
30
- yield outer_item, inner_item
31
- }.length > 0
27
+ yield outer_item, inner_hash[outer_item.full_path]
32
28
  }
33
29
  end
34
30
  end
@@ -54,13 +54,7 @@ module Filey
54
54
  name = s3_object.key
55
55
  end
56
56
 
57
- if (s3_object.head[:content_encoding] == "gzip")
58
- last_modified, md5 = last_modified_and_md5_from_gzipped(
59
- s3_object, path
60
- )
61
- else
62
- last_modified, md5 = last_modified_and_md5(s3_object)
63
- end
57
+ last_modified, md5 = last_modified_and_md5(s3_object)
64
58
 
65
59
  normalised_path = "./#{path}"
66
60
  filey = Filey.new(
@@ -78,35 +72,6 @@ module Filey
78
72
  md5 = s3_object.etag.gsub(/"/, '').split('-',2).first
79
73
  [last_modified, md5]
80
74
  end
81
-
82
- def last_modified_and_md5_from_gzipped(s3_object, path)
83
- s3_object_contents = s3_object.read
84
- if is_already_decoded s3_object_contents
85
- md5 = Digest::MD5.hexdigest(s3_object_contents)
86
- [s3_object.last_modified, md5]
87
- else
88
- tempfile = Tempfile.new(File.basename(path))
89
- tempfile.binmode
90
- tempfile.write s3_object_contents
91
- tempfile.close
92
-
93
- gz = Zlib::GzipReader.open(tempfile.path)
94
- last_modified = gz.mtime
95
- md5 = Digest::MD5.hexdigest(gz.read)
96
- gz.close
97
- [last_modified, md5]
98
- end
99
- end
100
-
101
- # Check if the two first bytes are the magic numbers of the gzip format.
102
- # We double-check here because Ruby 2.0.0 decodes gzip'ed HTTP responses
103
- # automatically. As a result, we get decoded gzip data from the
104
- # s3_object#read method when we are using Ruby 2.0.0, and encoded data
105
- # when we are using previous versions of Ruby.
106
- def is_already_decoded(gzipped_on_server)
107
- is_gzipped = gzipped_on_server.bytes.to_a[0] == 0x1f && gzipped_on_server.bytes.to_a[1] == 0x8b
108
- is_gzipped == false
109
- end
110
75
  end
111
76
  end
112
77
  end
@@ -160,35 +160,15 @@ describe Filey::DataSources::AwsSdkS3 do
160
160
  )]))
161
161
  }
162
162
 
163
- it 'provides the original md5/mtime of a gzipped file' do
163
+ it 'provides the md5/mtime of a gzipped file' do
164
164
  filey = data_source_with_one_gzipped_object.get_fileys[0]
165
- filey.md5.should eq(Digest::MD5.hexdigest(objects.first[:content]))
165
+ gzipped_content = IO.binread(gzip_tempfile_and_path[0])
166
+ filey.md5.should eq(Digest::MD5.hexdigest(gzipped_content))
166
167
  # GzipWriter seems to cut off fractions of a second,
167
168
  # to_i adjusts the original file to match
168
169
  filey.last_modified.to_i.should eq(objects.first[:mtime].to_i)
169
170
  end
170
171
 
171
- context 'working with Ruby 2.0.0 automatic decoding of gzipped HTTP responses' do
172
- let(:data_source_with_decoded_object_and_gzip_header) {
173
- data_source = Filey::DataSources::AwsSdkS3.new(S3Bucket.new([S3Object.new(
174
- objects.first[:path],
175
- objects.first[:mtime],
176
- objects.first[:content],
177
- { :content_encoding => 'gzip' }
178
- )]))
179
- }
180
-
181
- it 'detects the case where the gzipped data has already been decoded' do
182
- filey = data_source_with_decoded_object_and_gzip_header.get_fileys.first
183
- filey.last_modified.should eq(objects.first[:mtime])
184
- end
185
-
186
- it 'returns the md5 of the gzip-decoded content' do
187
- filey = data_source_with_decoded_object_and_gzip_header.get_fileys.first
188
- filey.md5.should eq(Digest::MD5.hexdigest(objects.first[:content]))
189
- end
190
- end
191
-
192
172
  def gzip_into_tmp_file(content, mtime)
193
173
  tempfile = Tempfile.new("temp")
194
174
 
metadata CHANGED
@@ -1,41 +1,41 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filey-diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.3
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lauri Lehmijoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-01 00:00:00.000000000 Z
11
+ date: 2014-05-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0.9'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0.9'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: '2.11'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '2.11'
41
41
  description: "\n Find missing or outdated files.\n For example, compare your
@@ -45,8 +45,8 @@ executables: []
45
45
  extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
- - .gitignore
49
- - .travis.yml
48
+ - ".gitignore"
49
+ - ".travis.yml"
50
50
  - Gemfile
51
51
  - README.md
52
52
  - Rakefile
@@ -72,17 +72,17 @@ require_paths:
72
72
  - lib
73
73
  required_ruby_version: !ruby/object:Gem::Requirement
74
74
  requirements:
75
- - - '>='
75
+ - - ">="
76
76
  - !ruby/object:Gem::Version
77
77
  version: '0'
78
78
  required_rubygems_version: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  requirements: []
84
84
  rubyforge_project:
85
- rubygems_version: 2.0.3
85
+ rubygems_version: 2.2.2
86
86
  signing_key:
87
87
  specification_version: 4
88
88
  summary: Compare two data sources that contain file-like objects