filey-diff 1.4.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d859b05f01d263d190fe2458c2b2ffc4b58395ce
4
- data.tar.gz: 3fbda83260167397c8cd1bb7b71ff63b63103394
3
+ metadata.gz: cc23f9bf625a91c7d80733abc57047d431ff600a
4
+ data.tar.gz: 2f6f4ec21efb418cf0e5b9e704d34fe5d26908e5
5
5
  SHA512:
6
- metadata.gz: 78cccad67134289ccf0be21024ba3f37e90abd7bac9278e830bdd14c16974f633a21672923fb7834fe695504585095abd61ad5d01258c321c642ebba7a2ea000
7
- data.tar.gz: 7210db02b36df164b4684f2492f45f01fbdb62e37ea0e3ce55172b004dc43adcf6f3c21259c17af0faa2f80ab4e4faa6d60261c9066d2e3a11022354bf304007
6
+ metadata.gz: 7e37c4f64dda53464693dc2f5b670a1059a2b08867634f36556406083ba0518f556613365da7a9dbd09fa211d273b865ae84f171cba916cca5323da46a1781b7
7
+ data.tar.gz: c5109d899be4af7c5fa12566cce6a96c2232b2d3abcf9b2fe8aa6cc5e2f10409736ab2662166faaa3f933a886e67e4b201cd278878436927fae3596fefd6ba6c
@@ -2,6 +2,14 @@
2
2
 
3
3
  This project is [Semantically Versioned](http://semver.org).
4
4
 
5
+ ## 2.0.0
6
+
7
+ * Improve performance significantly (see https://github.com/laurilehmijoki/s3_website/issues/44)
8
+
9
+ #### Non-backwards compatible changes:
10
+ * Calculate MD5 of gzipped S3 object using its gzipped content
11
+ (previously MD5 was always calculated using raw, non-gzipped file content).
12
+
5
13
  ## 1.4.3
6
14
 
7
15
  * Fix issue <https://github.com/laurilehmijoki/filey-diff/pull/8>
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'filey-diff'
3
- s.version = '1.4.3'
3
+ s.version = '2.0.0'
4
4
 
5
5
  s.summary = "Compare two data sources that contain file-like objects"
6
6
  s.description =
@@ -2,33 +2,29 @@ module Filey
2
2
  class Comparison
3
3
  def self.list_outdated(data_source_a, data_source_b)
4
4
  select_in_outer_array(data_source_b, data_source_a) { |b_item, a_item|
5
- b_item.full_path == a_item.full_path and
6
- b_item.last_modified < a_item.last_modified
5
+ !a_item.nil? && b_item.last_modified < a_item.last_modified
7
6
  }
8
7
  end
9
8
 
10
9
  def self.list_changed(data_source_a, data_source_b)
11
10
  select_in_outer_array(data_source_b, data_source_a) { |b_item, a_item|
12
- b_item.full_path == a_item.full_path and
13
- b_item.md5 != a_item.md5
11
+ !a_item.nil? && b_item.md5 != a_item.md5
14
12
  }
15
13
  end
16
14
 
17
15
  def self.list_missing(data_source_a, data_source_b)
18
- intersection = select_in_outer_array(data_source_a, data_source_b) do
19
- |a_item, b_item|
20
- b_item.full_path == a_item.full_path
21
- end
16
+ intersection = select_in_outer_array(data_source_a, data_source_b) { |a_item, b_item|
17
+ !b_item.nil?
18
+ }
22
19
  data_source_a.get_fileys - intersection
23
20
  end
24
21
 
25
22
  private
26
23
 
27
24
  def self.select_in_outer_array(outer, inner)
25
+ inner_hash = inner.get_fileys.reduce({}) { |h, e| h[e.full_path] = e; h }
28
26
  outer.get_fileys.select { |outer_item|
29
- inner.get_fileys.select { |inner_item|
30
- yield outer_item, inner_item
31
- }.length > 0
27
+ yield outer_item, inner_hash[outer_item.full_path]
32
28
  }
33
29
  end
34
30
  end
@@ -54,13 +54,7 @@ module Filey
54
54
  name = s3_object.key
55
55
  end
56
56
 
57
- if (s3_object.head[:content_encoding] == "gzip")
58
- last_modified, md5 = last_modified_and_md5_from_gzipped(
59
- s3_object, path
60
- )
61
- else
62
- last_modified, md5 = last_modified_and_md5(s3_object)
63
- end
57
+ last_modified, md5 = last_modified_and_md5(s3_object)
64
58
 
65
59
  normalised_path = "./#{path}"
66
60
  filey = Filey.new(
@@ -78,35 +72,6 @@ module Filey
78
72
  md5 = s3_object.etag.gsub(/"/, '').split('-',2).first
79
73
  [last_modified, md5]
80
74
  end
81
-
82
- def last_modified_and_md5_from_gzipped(s3_object, path)
83
- s3_object_contents = s3_object.read
84
- if is_already_decoded s3_object_contents
85
- md5 = Digest::MD5.hexdigest(s3_object_contents)
86
- [s3_object.last_modified, md5]
87
- else
88
- tempfile = Tempfile.new(File.basename(path))
89
- tempfile.binmode
90
- tempfile.write s3_object_contents
91
- tempfile.close
92
-
93
- gz = Zlib::GzipReader.open(tempfile.path)
94
- last_modified = gz.mtime
95
- md5 = Digest::MD5.hexdigest(gz.read)
96
- gz.close
97
- [last_modified, md5]
98
- end
99
- end
100
-
101
- # Check if the two first bytes are the magic numbers of the gzip format.
102
- # We double-check here because Ruby 2.0.0 decodes gzip'ed HTTP responses
103
- # automatically. As a result, we get decoded gzip data from the
104
- # s3_object#read method when we are using Ruby 2.0.0, and encoded data
105
- # when we are using previous versions of Ruby.
106
- def is_already_decoded(gzipped_on_server)
107
- is_gzipped = gzipped_on_server.bytes.to_a[0] == 0x1f && gzipped_on_server.bytes.to_a[1] == 0x8b
108
- is_gzipped == false
109
- end
110
75
  end
111
76
  end
112
77
  end
@@ -160,35 +160,15 @@ describe Filey::DataSources::AwsSdkS3 do
160
160
  )]))
161
161
  }
162
162
 
163
- it 'provides the original md5/mtime of a gzipped file' do
163
+ it 'provides the md5/mtime of a gzipped file' do
164
164
  filey = data_source_with_one_gzipped_object.get_fileys[0]
165
- filey.md5.should eq(Digest::MD5.hexdigest(objects.first[:content]))
165
+ gzipped_content = IO.binread(gzip_tempfile_and_path[0])
166
+ filey.md5.should eq(Digest::MD5.hexdigest(gzipped_content))
166
167
  # GzipWriter seems to cut off fractions of a second,
167
168
  # to_i adjusts the original file to match
168
169
  filey.last_modified.to_i.should eq(objects.first[:mtime].to_i)
169
170
  end
170
171
 
171
- context 'working with Ruby 2.0.0 automatic decoding of gzipped HTTP responses' do
172
- let(:data_source_with_decoded_object_and_gzip_header) {
173
- data_source = Filey::DataSources::AwsSdkS3.new(S3Bucket.new([S3Object.new(
174
- objects.first[:path],
175
- objects.first[:mtime],
176
- objects.first[:content],
177
- { :content_encoding => 'gzip' }
178
- )]))
179
- }
180
-
181
- it 'detects the case where the gzipped data has already been decoded' do
182
- filey = data_source_with_decoded_object_and_gzip_header.get_fileys.first
183
- filey.last_modified.should eq(objects.first[:mtime])
184
- end
185
-
186
- it 'returns the md5 of the gzip-decoded content' do
187
- filey = data_source_with_decoded_object_and_gzip_header.get_fileys.first
188
- filey.md5.should eq(Digest::MD5.hexdigest(objects.first[:content]))
189
- end
190
- end
191
-
192
172
  def gzip_into_tmp_file(content, mtime)
193
173
  tempfile = Tempfile.new("temp")
194
174
 
metadata CHANGED
@@ -1,41 +1,41 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filey-diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.3
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lauri Lehmijoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-01 00:00:00.000000000 Z
11
+ date: 2014-05-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0.9'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0.9'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: '2.11'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '2.11'
41
41
  description: "\n Find missing or outdated files.\n For example, compare your
@@ -45,8 +45,8 @@ executables: []
45
45
  extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
- - .gitignore
49
- - .travis.yml
48
+ - ".gitignore"
49
+ - ".travis.yml"
50
50
  - Gemfile
51
51
  - README.md
52
52
  - Rakefile
@@ -72,17 +72,17 @@ require_paths:
72
72
  - lib
73
73
  required_ruby_version: !ruby/object:Gem::Requirement
74
74
  requirements:
75
- - - '>='
75
+ - - ">="
76
76
  - !ruby/object:Gem::Version
77
77
  version: '0'
78
78
  required_rubygems_version: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  requirements: []
84
84
  rubyforge_project:
85
- rubygems_version: 2.0.3
85
+ rubygems_version: 2.2.2
86
86
  signing_key:
87
87
  specification_version: 4
88
88
  summary: Compare two data sources that contain file-like objects