httpimagestore 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -1,10 +1,8 @@
1
1
  source "http://rubygems.org"
2
2
  ruby "1.9.3"
3
3
 
4
- #gem "unicorn-cuba-base", "~> 1.0"
5
- gem "unicorn-cuba-base", path: '../unicorn-cuba-base'
6
- #gem "httpthumbnailer-client", "~> 1.0"
7
- gem "httpthumbnailer-client", path: '../httpthumbnailer-client'
4
+ gem "unicorn-cuba-base", "~> 1.1"
5
+ gem "httpthumbnailer-client", "~> 1.1"
8
6
  gem "aws-sdk", "~> 1.10"
9
7
  gem "mime-types", "~> 1.17"
10
8
  gem "sdl4r", "~> 0.9"
data/Gemfile.lock CHANGED
@@ -1,63 +1,79 @@
1
1
  PATH
2
2
  remote: ../httpthumbnailer
3
3
  specs:
4
- httpthumbnailer (1.0.0)
4
+ httpthumbnailer (1.1.0)
5
5
  rmagick (~> 2)
6
- unicorn-cuba-base (~> 1.0)
7
-
8
- PATH
9
- remote: ../httpthumbnailer-client
10
- specs:
11
- httpthumbnailer-client (1.0.0)
12
- cli (~> 1.1.0)
13
- httpclient (>= 2.3)
14
- multipart-parser (~> 0.1.1)
15
-
16
- PATH
17
- remote: ../unicorn-cuba-base
18
- specs:
19
- unicorn-cuba-base (1.0.0)
20
- cli (~> 1.1.0)
21
- cuba (~> 3.0)
22
- facter (~> 1.6.11)
23
- raindrops (~> 0.11)
24
- ruby-ip (~> 0.9)
25
- unicorn (>= 4.6.2)
6
+ unicorn-cuba-base (~> 1.1)
26
7
 
27
8
  GEM
28
9
  remote: http://rubygems.org/
29
10
  specs:
30
- aws-sdk (1.10.0)
11
+ addressable (2.3.5)
12
+ aws-sdk (1.18.0)
31
13
  json (~> 1.4)
32
- nokogiri (>= 1.4.4)
14
+ nokogiri (< 1.6.0)
33
15
  uuidtools (~> 2.1)
34
- builder (3.0.0)
16
+ builder (3.2.2)
35
17
  cli (1.1.1)
36
18
  cuba (3.1.0)
37
19
  rack
38
- cucumber (1.2.1)
20
+ cucumber (1.3.8)
39
21
  builder (>= 2.1.2)
40
22
  diff-lcs (>= 1.1.3)
41
- gherkin (~> 2.11.0)
42
- json (>= 1.4.6)
23
+ gherkin (~> 2.12.1)
24
+ multi_json (>= 1.7.5, < 2.0)
25
+ multi_test (>= 0.0.2)
43
26
  daemon (1.1.0)
44
- diff-lcs (1.1.3)
27
+ diff-lcs (1.2.4)
45
28
  facter (1.6.18)
46
- gherkin (2.11.2)
47
- json (>= 1.4.6)
48
- git (1.2.5)
49
- httpclient (2.3.3)
50
- jeweler (1.8.4)
29
+ faraday (0.8.8)
30
+ multipart-post (~> 1.2.0)
31
+ gherkin (2.12.1)
32
+ multi_json (~> 1.3)
33
+ git (1.2.6)
34
+ github_api (0.10.1)
35
+ addressable
36
+ faraday (~> 0.8.1)
37
+ hashie (>= 1.2)
38
+ multi_json (~> 1.4)
39
+ nokogiri (~> 1.5.2)
40
+ oauth2
41
+ hashie (2.0.5)
42
+ highline (1.6.19)
43
+ httpauth (0.2.0)
44
+ httpclient (2.3.4.1)
45
+ httpthumbnailer-client (1.1.0)
46
+ cli (~> 1.1.0)
47
+ httpclient (>= 2.3)
48
+ multipart-parser (~> 0.1.1)
49
+ jeweler (1.8.7)
50
+ builder
51
51
  bundler (~> 1.0)
52
52
  git (>= 1.2.5)
53
+ github_api (= 0.10.1)
54
+ highline (>= 1.6.15)
55
+ nokogiri (= 1.5.10)
53
56
  rake
54
57
  rdoc
55
- json (1.7.5)
56
- kgio (2.8.0)
57
- mime-types (1.17.2)
58
+ json (1.8.0)
59
+ jwt (0.1.8)
60
+ multi_json (>= 1.5)
61
+ kgio (2.8.1)
62
+ mime-types (1.25)
58
63
  msgpack (0.5.5)
64
+ multi_json (1.8.0)
65
+ multi_test (0.0.2)
66
+ multi_xml (0.5.5)
59
67
  multipart-parser (0.1.1)
60
- nokogiri (1.5.9)
68
+ multipart-post (1.2.0)
69
+ nokogiri (1.5.10)
70
+ oauth2 (0.9.2)
71
+ faraday (~> 0.8)
72
+ httpauth (~> 0.2)
73
+ jwt (~> 0.1.4)
74
+ multi_json (~> 1.0)
75
+ multi_xml (~> 0.5)
76
+ rack (~> 1.2)
61
77
  prawn (0.8.4)
62
78
  prawn-core (>= 0.8.4, < 0.9)
63
79
  prawn-layout (>= 0.8.4, < 0.9)
@@ -66,25 +82,32 @@ GEM
66
82
  prawn-layout (0.8.4)
67
83
  prawn-security (0.8.4)
68
84
  rack (1.5.2)
69
- raindrops (0.11.0)
70
- rake (10.0.4)
71
- rdoc (3.12)
85
+ raindrops (0.12.0)
86
+ rake (10.1.0)
87
+ rdoc (3.12.2)
72
88
  json (~> 1.4)
73
89
  rmagick (2.13.2)
74
- rspec (2.13.0)
75
- rspec-core (~> 2.13.0)
76
- rspec-expectations (~> 2.13.0)
77
- rspec-mocks (~> 2.13.0)
78
- rspec-core (2.13.1)
79
- rspec-expectations (2.13.0)
90
+ rspec (2.14.1)
91
+ rspec-core (~> 2.14.0)
92
+ rspec-expectations (~> 2.14.0)
93
+ rspec-mocks (~> 2.14.0)
94
+ rspec-core (2.14.5)
95
+ rspec-expectations (2.14.2)
80
96
  diff-lcs (>= 1.1.3, < 2.0)
81
- rspec-mocks (2.13.1)
97
+ rspec-mocks (2.14.3)
82
98
  ruby-ip (0.9.1)
83
99
  sdl4r (0.9.11)
84
100
  unicorn (4.6.3)
85
101
  kgio (~> 2.6)
86
102
  rack
87
103
  raindrops (~> 0.7)
104
+ unicorn-cuba-base (1.1.0)
105
+ cli (~> 1.1.0)
106
+ cuba (~> 3.0)
107
+ facter (~> 1.6.11)
108
+ raindrops (~> 0.11)
109
+ ruby-ip (~> 0.9)
110
+ unicorn (>= 4.6.2)
88
111
  uuidtools (2.1.4)
89
112
 
90
113
  PLATFORMS
@@ -96,7 +119,7 @@ DEPENDENCIES
96
119
  daemon (~> 1)
97
120
  httpclient (>= 2.3)
98
121
  httpthumbnailer!
99
- httpthumbnailer-client!
122
+ httpthumbnailer-client (~> 1.1)
100
123
  jeweler (~> 1.8.4)
101
124
  mime-types (~> 1.17)
102
125
  msgpack (~> 0.5)
@@ -104,4 +127,4 @@ DEPENDENCIES
104
127
  rdoc (~> 3.9)
105
128
  rspec (~> 2.13)
106
129
  sdl4r (~> 0.9)
107
- unicorn-cuba-base!
130
+ unicorn-cuba-base (~> 1.1)
data/README.md CHANGED
@@ -11,11 +11,15 @@ It is using [HTTP Thumbnailer](https://github.com/jpastuszek/httpthumbnailer) as
11
11
  * sourcing and storage of images on [Amazon S3](http://aws.amazon.com/s3/)
12
12
  * image output with Cache-Control header
13
13
  * S3 public or private and http:// or https:// URL list output for stored images
14
+ * S3 read-through and write-through object cache
14
15
  * storage under custom paths including image hash, content determined extension or used URL path
15
16
  * based on [Unicorn HTTP server](http://unicorn.bogomips.org) with UNIX socket communication support
16
17
 
17
18
  ## Changelog
18
19
 
20
+ ### 1.4.0
21
+ * read-through and write-through S3 object cache support
22
+
19
23
  ### 1.3.0
20
24
 
21
25
  * `identify` statement support (requires [HTTP Thumbnailer](https://github.com/jpastuszek/httpthumbnailer) v1.1.0 or higher)
@@ -233,7 +237,8 @@ Options:
233
237
 
234
238
  * `bucket` - name of bucket to source image from
235
239
  * `path` - name of predefined path that will be used to generate key to object to source
236
- * `prefix` - prefix object key with given prefix value; this does not affect fromat of output URL; prefix will not be included in source path output; default: ``
240
+ * `prefix` - prefix object key with given prefix value; this does not affect format of output URL; prefix will not be included in source path output; default: ``
241
+ * `cache-root` - path to directory where S3 objects and meta-data will be cached when sourced from S3; read-through mode; if required information was found in cache object no S3 requests will be made; same directory can be used with different buckets since cache key consists of S3 bucket name and object key
237
242
 
238
243
  Example:
239
244
 
@@ -361,6 +366,7 @@ Options:
361
366
  * `path` - name of predefined path that will be used to generate key to store object under
362
367
  * `public` - if set to `true` the image will be readable by everybody; this affects fromat of output URL; default: `false`
363
368
  * `prefix` - prefix storeage key with given prefix value; this does not affect fromat of output URL; prefix will not be included in storage path output; default: ``
369
+ * `cache-root` - path to directory where stored S3 objects and meta-data will be cached for future sourcing with `source_s3`; write-through mode; note that same directory needs to be configured with corresponding `source_s3` statement
364
370
 
365
371
  Example:
366
372
 
@@ -526,11 +532,12 @@ This option is useful when building API that works on predefined set of image op
526
532
 
527
533
  ### Flexible API example
528
534
 
529
- Features two storage apporaches: with JPEG conversion and limiting in size - for user provided content - and storing literaly.
535
+ Features two storage approaches: with JPEG conversion and limiting in size - for user provided content - and storing literally.
530
536
  POST requests will end up with server side generated storage key based on input data digest.
531
- PUT requsts can be used to store image under provided storage key.
532
- Thumbnail GET API is similart to described in [Facebook APIs](https://developers.facebook.com/docs/reference/api/using-pictures/#sizes) for thumbnailing.
537
+ PUT requests can be used to store image under provided storage key.
538
+ Thumbnail GET API is similar to described in [Facebook APIs](https://developers.facebook.com/docs/reference/api/using-pictures/#sizes) for thumbnailing.
533
539
  Stored object extension and content type is determined from image data.
540
+ S3 objects are cached on storage and on read if not cached already (read-through/write-through cache).
534
541
 
535
542
  ```sdl
536
543
  s3 key="AIAITCKMELYWQZPJP7HQ" secret="V37lCu0F48Tv9s7QVqIT/sLf/wwqhNSB4B0Em7Ei" ssl=false
@@ -541,77 +548,77 @@ path "path" "#{path}"
541
548
  ## User uploaded content - always JPEG converted, not bigger than 2160x2160 and in hight quality compression
542
549
  post "pictures" {
543
550
  thumbnail "input" "original" operation="limit" width=2160 height=2160 format="jpeg" quality=95
544
- store_s3 "original" bucket="mybucket" path="hash"
551
+ store_s3 "original" bucket="mybucket" path="hash" cache-root="/var/cache/httpimagestore"
545
552
  output_store_path "original"
546
553
  }
547
554
 
548
555
  put "pictures" {
549
556
  thumbnail "input" "original" operation="limit" width=2160 height=2160 format="jpeg" quality=95
550
- store_s3 "original" bucket="mybucket" path="path"
557
+ store_s3 "original" bucket="mybucket" path="path" cache-root="/var/cache/httpimagestore"
551
558
  output_store_path "original"
552
559
  }
553
560
 
554
561
  ## Uploaded by admin for use on the website for example - store whatever was send
555
562
  post "images" {
556
563
  identify "input"
557
- store_s3 "input" bucket="mybucket" path="hash"
564
+ store_s3 "input" bucket="mybucket" path="hash" cache-root="/var/cache/httpimagestore"
558
565
  output_store_path "input"
559
566
  }
560
567
 
561
568
  put "images" {
562
569
  identify "input"
563
- store_s3 "input" bucket="mybucket" path="path"
570
+ store_s3 "input" bucket="mybucket" path="path" cache-root="/var/cache/httpimagestore"
564
571
  output_store_path "input"
565
572
  }
566
573
 
567
574
  ## Thumbailing - keep input format; default JPEG quality is 85
568
575
  ### Thumbnail specification from query string paramaters
569
576
  get "pictures" "&:width" "&:height" "&:operation?crop" "&:background-color?white" {
570
- source_s3 "original" bucket="mybucket" path="path"
577
+ source_s3 "original" bucket="mybucket" path="path" cache-root="/var/cache/httpimagestore"
571
578
  thumbnail "original" "thumbnail" operation="#{operation}" width="#{width}" height="#{height}" options="background-color:#{background-color}"
572
579
  output_image "thumbnail" cache-control="public, max-age=31557600, s-maxage=0"
573
580
  }
574
581
 
575
582
  get "pictures" "&:width" "&:height?1080" "&:operation?fit" "&:background-color?white" {
576
- source_s3 "original" bucket="mybucket" path="path"
583
+ source_s3 "original" bucket="mybucket" path="path" cache-root="/var/cache/httpimagestore"
577
584
  thumbnail "original" "thumbnail" operation="#{operation}" width="#{width}" height="#{height}" options="background-color:#{background-color}"
578
585
  output_image "thumbnail" cache-control="public, max-age=31557600, s-maxage=0"
579
586
  }
580
587
 
581
588
  get "pictures" "&:height" "&:width?1080" "&:operation?fit" "&:background-color?white" {
582
- source_s3 "original" bucket="mybucket" path="path"
589
+ source_s3 "original" bucket="mybucket" path="path" cache-root="/var/cache/httpimagestore"
583
590
  thumbnail "original" "thumbnail" operation="#{operation}" width="#{width}" height="#{height}" options="background-color:#{background-color}"
584
591
  output_image "thumbnail" cache-control="public, max-age=31557600, s-maxage=0"
585
592
  }
586
593
 
587
594
  ### Predefined thumbnailing specification
588
595
  get "pictures" "&type=square" {
589
- source_s3 "original" bucket="mybucket" path="path"
596
+ source_s3 "original" bucket="mybucket" path="path" cache-root="/var/cache/httpimagestore"
590
597
  thumbnail "original" "thumbnail" operation="crop" width="50" height="50"
591
598
  output_image "thumbnail" cache-control="public, max-age=31557600, s-maxage=0"
592
599
  }
593
600
 
594
601
  get "pictures" "&type=small" {
595
- source_s3 "original" bucket="mybucket" path="path"
602
+ source_s3 "original" bucket="mybucket" path="path" cache-root="/var/cache/httpimagestore"
596
603
  thumbnail "original" "thumbnail" operation="fit" width="50" height="2000"
597
604
  output_image "thumbnail" cache-control="public, max-age=31557600, s-maxage=0"
598
605
  }
599
606
 
600
607
  get "pictures" "&type=normall" {
601
- source_s3 "original" bucket="mybucket" path="path"
608
+ source_s3 "original" bucket="mybucket" path="path" cache-root="/var/cache/httpimagestore"
602
609
  thumbnail "original" "thumbnail" operation="fit" width="100" height="2000"
603
610
  output_image "thumbnail" cache-control="public, max-age=31557600, s-maxage=0"
604
611
  }
605
612
 
606
613
  get "pictures" "&type=large" {
607
- source_s3 "original" bucket="mybucket" path="path"
614
+ source_s3 "original" bucket="mybucket" path="path" cache-root="/var/cache/httpimagestore"
608
615
  thumbnail "original" "thumbnail" operation="fit" width="200" height="2000"
609
616
  output_image "thumbnail" cache-control="public, max-age=31557600, s-maxage=0"
610
617
  }
611
618
 
612
619
  ## By default serve original image as is - JPEG for user content and what was send for admin uploaded images
613
620
  get "pictures" {
614
- source_s3 "original" bucket="mybucket" path="path"
621
+ source_s3 "original" bucket="mybucket" path="path" cache-root="/var/cache/httpimagestore"
615
622
  output_image "original" cache-control="public, max-age=31557600, s-maxage=0"
616
623
  }
617
624
  ```
@@ -719,7 +726,7 @@ Compatibility API works by storing input image and selected (via URI) classes of
719
726
 
720
727
  With thumbnail on demand API user uploads original image. It is converted to JPEG and if it is too large also scaled down. Than that processed version is stored in S3 under key composed from hash of input image data and final image extension. Client will receive storage key for further reference in the response body. To obtain thumbnail **GET** request with obtained key and thumbnail parameters encoded in the URI needs to be send to the sever. It will read parameters from the URI and source selected image from S3. That image is then thumbnailed in the backend and sent back to client with custom Cache-Control header.
721
728
 
722
- Note that Compatibility API will also store "migarion" image in bucket used by on demand API. This allows for migration from that API to on demand API.
729
+ Note that Compatibility API will also store "migration" image in bucket used by on demand API. This allows for migration from that API to on demand API.
723
730
 
724
731
  Compatibility API example:
725
732
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.2.0
1
+ 1.4.0
@@ -16,7 +16,7 @@ Feature: Store limited original image in S3 and thumbnail based on request
16
16
  put "original" {
17
17
  thumbnail "input" "original" operation="limit" width=100 height=100 format="jpeg" quality=95
18
18
 
19
- store_s3 "original" bucket="@AWS_S3_TEST_BUCKET@" path="original-hash"
19
+ store_s3 "original" bucket="@AWS_S3_TEST_BUCKET@" path="original-hash" cache-root="/tmp"
20
20
 
21
21
  output_store_path "original"
22
22
  }
@@ -30,7 +30,7 @@ Feature: Store limited original image in S3 and thumbnail based on request
30
30
  }
31
31
 
32
32
  get "thumbnail" "v2" ":operation" ":width" ":height" {
33
- source_s3 "original" bucket="@AWS_S3_TEST_BUCKET@" path="path"
33
+ source_s3 "original" bucket="@AWS_S3_TEST_BUCKET@" path="path" cache-root="/tmp"
34
34
 
35
35
  thumbnail "original" "thumbnail" operation="#{operation}" width="#{width}" height="#{height}" options="#{query_string_options}" quality=84 format="png"
36
36
 
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "httpimagestore"
8
- s.version = "1.3.0"
8
+ s.version = "1.4.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Jakub Pastuszek"]
12
- s.date = "2013-09-11"
12
+ s.date = "2013-10-04"
13
13
  s.description = "Thumbnails images using httpthumbnailer and stored data on HTTP server (S3)"
14
14
  s.email = "jpastuszek@gmail.com"
15
15
  s.executables = ["httpimagestore"]
@@ -86,6 +86,7 @@ Gem::Specification.new do |s|
86
86
  s.add_runtime_dependency(%q<aws-sdk>, ["~> 1.10"])
87
87
  s.add_runtime_dependency(%q<mime-types>, ["~> 1.17"])
88
88
  s.add_runtime_dependency(%q<sdl4r>, ["~> 0.9"])
89
+ s.add_runtime_dependency(%q<msgpack>, ["~> 0.5"])
89
90
  s.add_development_dependency(%q<httpclient>, [">= 2.3"])
90
91
  s.add_development_dependency(%q<rspec>, ["~> 2.13"])
91
92
  s.add_development_dependency(%q<cucumber>, [">= 0"])
@@ -100,6 +101,7 @@ Gem::Specification.new do |s|
100
101
  s.add_dependency(%q<aws-sdk>, ["~> 1.10"])
101
102
  s.add_dependency(%q<mime-types>, ["~> 1.17"])
102
103
  s.add_dependency(%q<sdl4r>, ["~> 0.9"])
104
+ s.add_dependency(%q<msgpack>, ["~> 0.5"])
103
105
  s.add_dependency(%q<httpclient>, [">= 2.3"])
104
106
  s.add_dependency(%q<rspec>, ["~> 2.13"])
105
107
  s.add_dependency(%q<cucumber>, [">= 0"])
@@ -115,6 +117,7 @@ Gem::Specification.new do |s|
115
117
  s.add_dependency(%q<aws-sdk>, ["~> 1.10"])
116
118
  s.add_dependency(%q<mime-types>, ["~> 1.17"])
117
119
  s.add_dependency(%q<sdl4r>, ["~> 0.9"])
120
+ s.add_dependency(%q<msgpack>, ["~> 0.5"])
118
121
  s.add_dependency(%q<httpclient>, [">= 2.3"])
119
122
  s.add_dependency(%q<rspec>, ["~> 2.13"])
120
123
  s.add_dependency(%q<cucumber>, [">= 0"])
@@ -72,7 +72,7 @@ module Configuration
72
72
 
73
73
  log.info "sourcing '#{image_name}' from file '#{storage_path}'"
74
74
  begin
75
- data = storage_path.open('r') do |io|
75
+ data = storage_path.open('rb') do |io|
76
76
  request_state.memory_limit.io io
77
77
  io.read
78
78
  end
@@ -108,7 +108,7 @@ module Configuration
108
108
  image.store_url = "file://#{rendered_path.to_s}"
109
109
 
110
110
  log.info "storing '#{image_name}' in file '#{storage_path}' (#{image.data.length} bytes)"
111
- storage_path.open('w'){|io| io.write image.data}
111
+ storage_path.open('wb'){|io| io.write image.data}
112
112
  FileSourceStoreBase.stats.incr_total_file_store
113
113
  FileSourceStoreBase.stats.incr_total_file_store_bytes(image.data.bytesize)
114
114
  end
@@ -83,6 +83,64 @@ module Configuration
83
83
  end
84
84
  end
85
85
 
86
+ class CacheFile < Pathname
87
+ def initialize(path)
88
+ super
89
+ @header = nil
90
+ end
91
+
92
+ def header
93
+ begin
94
+ read(0)
95
+ rescue
96
+ @header = {}
97
+ end unless @header
98
+ @header or fail 'no header data'
99
+ end
100
+
101
+ def read(max_bytes = nil)
102
+ open('rb') do |io|
103
+ io.flock(File::LOCK_SH)
104
+ @header = read_header(io)
105
+ return io.read(max_bytes)
106
+ end
107
+ end
108
+
109
+ def write(data)
110
+ dirname.directory? or dirname.mkpath
111
+ open('ab') do |io|
112
+ # opened but not truncated before lock can be obtained
113
+ io.flock(File::LOCK_EX)
114
+
115
+ # now get rid of the old content if any
116
+ io.seek 0, IO::SEEK_SET
117
+ io.truncate 0
118
+
119
+ begin
120
+ header = MessagePack.pack(@header)
121
+ io.write [header.length].pack('L') # header length
122
+ io.write header
123
+ io.write data
124
+ rescue => error
125
+ unlink # remove broken cache file
126
+ raise
127
+ end
128
+ end
129
+ end
130
+
131
+ private
132
+
133
+ def read_header_length(io)
134
+ head_length = io.read(4)
135
+ fail 'no header length' unless head_length and head_length.length == 4
136
+ head_length.unpack('L').first
137
+ end
138
+
139
+ def read_header(io)
140
+ MessagePack.unpack(io.read(read_header_length(io)))
141
+ end
142
+ end
143
+
86
144
  def initialize(root_dir)
87
145
  @root = Pathname.new(root_dir)
88
146
  @root.directory? or raise CacheRootNotDirError.new(root_dir)
@@ -91,23 +149,7 @@ module Configuration
91
149
  end
92
150
 
93
151
  def cache_file(bucket, key)
94
- File.join(Digest::SHA2.new.update("#{bucket}/#{key}").to_s[0,32].match(/(..)(..)(.*)/).captures)
95
- end
96
-
97
- def open(bucket, key)
98
- # TODO: locking
99
- file = @root + cache_file(bucket, key)
100
-
101
- file.dirname.directory? or file.dirname.mkpath
102
- if file.exist?
103
- file.open('r+') do |io|
104
- yield io
105
- end
106
- else
107
- file.open('w+') do |io|
108
- yield io
109
- end
110
- end
152
+ CacheFile.new(File.join(@root.to_s, *Digest::SHA2.new.update("#{bucket}/#{key}").to_s[0,32].match(/(..)(..)(.*)/).captures))
111
153
  end
112
154
  end
113
155
 
@@ -147,91 +189,74 @@ module Configuration
147
189
  end
148
190
 
149
191
  class CacheObject < S3Object
192
+ extend Stats
193
+ def_stats(
194
+ :total_s3_cache_hits,
195
+ :total_s3_cache_misses,
196
+ :total_s3_cache_errors,
197
+ )
198
+
150
199
  include ClassLogging
151
200
 
152
- def initialize(io, client, bucket, key)
153
- @io = io
201
+ def initialize(cache_file, client, bucket, key)
154
202
  super(client, bucket, key)
155
203
 
156
- @header = {}
157
- @have_cache = false
204
+ @cache_file = cache_file
158
205
  @dirty = false
159
206
 
160
- begin
161
- head_length = @io.read(4)
162
-
163
- if head_length and head_length.length == 4
164
- head_length = head_length.unpack('L').first
165
- @header = MessagePack.unpack(@io.read(head_length))
166
- @have_cache = true
167
-
168
- log.debug{"S3 object cache hit; bucket: '#{@bucket}' key: '#{@key}' [#{@io.path}]: header: #{@header}"}
169
- else
170
- log.debug{"S3 object cache miss; bucket: '#{@bucket}' key: '#{@key}' [#{@io.path}]"}
171
- end
172
- rescue => error
173
- log.warn "cannot use cached S3 object; bucket: '#{@bucket}' key: '#{@key}' [#{@io.path}]: #{error}"
174
- # not usable
175
- io.seek 0
176
- io.truncate 0
177
- end
178
-
179
207
  yield self
180
208
 
181
- # save object as was used if no error happened and there were changes
209
+ # save object if new data was read/written to/from S3 and no error happened
182
210
  write_cache if dirty?
183
211
  end
184
212
 
185
213
  def read(max_bytes = nil)
186
- if @have_cache
187
- data_location = @io.seek(0, IO::SEEK_CUR)
188
- begin
189
- return @data = @io.read(max_bytes)
190
- ensure
191
- @io.seek(data_location, IO::SEEK_SET)
192
- end
193
- else
194
- dirty! :read
195
- return @data = super
214
+ begin
215
+ @data = @cache_file.read(max_bytes)
216
+ CacheObject.stats.incr_total_s3_cache_hits
217
+ log.debug{"S3 object cache hit for bucket: '#{@bucket}' key: '#{@key}' [#{@cache_file}]: header: #{@cache_file.header}"}
218
+ return @data
219
+ rescue Errno::ENOENT
220
+ CacheObject.stats.incr_total_s3_cache_misses
221
+ log.debug{"S3 object cache miss for bucket: '#{@bucket}' key: '#{@key}' [#{@cache_file}]"}
222
+ rescue => error
223
+ CacheObject.stats.incr_total_s3_cache_errors
224
+ log.warn "cannot use cached S3 object for bucket: '#{@bucket}' key: '#{@key}' [#{@cache_file}]", error
196
225
  end
226
+ @data = super
227
+ dirty! :read
228
+ return @data
197
229
  end
198
230
 
199
231
  def write(data, options = {})
200
- out = super
232
+ super
201
233
  @data = data
234
+ @cache_file.header['content_type'] = options[:content_type] if options[:content_type]
202
235
  dirty! :write
203
- out
204
236
  end
205
237
 
206
238
  def private_url
207
- @header['private_url'] ||= (dirty! :private_url; super)
239
+ @cache_file.header['private_url'] ||= (dirty! :private_url; super)
208
240
  end
209
241
 
210
242
  def public_url
211
- @header['public_url'] ||= (dirty! :public_url; super)
243
+ @cache_file.header['public_url'] ||= (dirty! :public_url; super)
212
244
  end
213
245
 
214
246
  def content_type
215
- @header['content_type'] ||= (dirty! :content_type; super)
247
+ @cache_file.header['content_type'] ||= (dirty! :content_type; super)
216
248
  end
217
249
 
218
250
  private
219
251
 
220
252
  def write_cache
221
253
  begin
222
- log.debug{"S3 object is dirty, wirting cache file; bucket: '#{@bucket}' key: '#{@key}' [#{@io.path}]; header: #{@header}"}
254
+ log.debug{"S3 object is dirty, wirting cache file for bucket: '#{@bucket}' key: '#{@key}' [#{@cache_file}]; header: #{@cache_file.header}"}
223
255
 
224
256
  raise 'nil data!' unless @data
225
- # rewrite
226
- @io.seek(0, IO::SEEK_SET)
227
- @io.truncate 0
228
-
229
- header = MessagePack.pack(@header)
230
- @io.write [header.length].pack('L') # header length
231
- @io.write header
232
- @io.write @data
257
+ @cache_file.write(@data)
233
258
  rescue => error
234
- log.warn "cannot store S3 object in cache: bucket: '#{@bucket}' key: '#{@key}' [#{@io.path}]: #{error}"
259
+ log.warn "cannot store S3 object in cache for bucket: '#{@bucket}' key: '#{@key}' [#{@cache_file}]", error
235
260
  ensure
236
261
  @dirty = false
237
262
  end
@@ -296,7 +321,7 @@ module Configuration
296
321
  log.info "S3 object cache not configured (no cache-root) for image '#{image_name}'"
297
322
  end
298
323
  rescue CacheRoot::CacheRootNotDirError => error
299
- log.warn "not using S3 object cache for image '#{image_name}': #{error}"
324
+ log.warn "not using S3 object cache for image '#{image_name}'", error
300
325
  end
301
326
 
302
327
  local :bucket, @bucket
@@ -321,18 +346,16 @@ module Configuration
321
346
 
322
347
  if @cache_root
323
348
  begin
324
- @cache_root.open(@bucket, key) do |cahce_file_io|
325
- CacheObject.new(cahce_file_io, client, @bucket, key) do |obj|
326
- image = yield obj
327
- end
349
+ cache_file = @cache_root.cache_file(@bucket, key)
350
+ CacheObject.new(cache_file, client, @bucket, key) do |obj|
351
+ image = yield obj
328
352
  end
329
- rescue IOError => error
330
- log.warn "cannot use S3 object cache '#{@cache_root.cache_file(@bucket, key)}': #{error}"
331
- image = yield obj
353
+ return image
354
+ rescue Errno::EACCES, IOError => error
355
+ log.warn "cannot use S3 object cache for bucket: '#{@bucket}' key: '#{key}' [#{cache_file}]", error
332
356
  end
333
- else
334
- image = yield S3Object.new(client, @bucket, key)
335
357
  end
358
+ return yield S3Object.new(client, @bucket, key)
336
359
  rescue AWS::S3::Errors::AccessDenied
337
360
  raise S3AccessDenied.new(@bucket, path)
338
361
  rescue AWS::S3::Errors::NoSuchBucket
@@ -340,7 +363,6 @@ module Configuration
340
363
  rescue AWS::S3::Errors::NoSuchKey
341
364
  raise S3NoSuchKeyError.new(@bucket, path)
342
365
  end
343
- image
344
366
  end
345
367
 
346
368
  S3SourceStoreBase.logger = Handler.logger_for(S3SourceStoreBase)
@@ -396,7 +418,7 @@ module Configuration
396
418
 
397
419
  options = {}
398
420
  options[:single_request] = true
399
- options[:content_type] = image.mime_type
421
+ options[:content_type] = image.mime_type if image.mime_type
400
422
  options[:acl] = acl
401
423
  options[:cache_control] = @cache_control if @cache_control
402
424
 
@@ -411,5 +433,6 @@ module Configuration
411
433
  end
412
434
  Handler::register_node_parser S3Store
413
435
  StatsReporter << S3SourceStoreBase.stats
436
+ StatsReporter << S3SourceStoreBase::CacheObject.stats
414
437
  end
415
438
 
@@ -72,9 +72,9 @@ else
72
72
  end
73
73
 
74
74
  before do
75
- @cached_object = Pathname.new('/tmp/0d/bf/50c256d6b6efe55d11d0b6b50600')
76
- @cached_object.dirname.mkpath
77
- @cached_object.open('w') do |io|
75
+ @cache_file = Pathname.new('/tmp/0d/bf/50c256d6b6efe55d11d0b6b50600')
76
+ @cache_file.dirname.mkpath
77
+ @cache_file.open('w') do |io|
78
78
  io.write 'abc'
79
79
  end
80
80
 
@@ -83,19 +83,9 @@ else
83
83
  end
84
84
 
85
85
  it 'should build cache file location for storage location from bucket and key' do
86
- subject.cache_file('mybucket', 'hello/world.jpg').should == "0d/bf/50c256d6b6efe55d11d0b6b50600"
87
- end
88
-
89
- it 'should look up object stored on disk by bucket and key' do
90
- subject.open('mybucket', 'hello/world.jpg') do |io|
91
- io.read.should == 'abc'
92
- end
93
- end
94
-
95
- it 'should create cache object for bucket and key if it does not exist' do
96
- subject.open('mybucket', 'hello/world2.jpg') do |io|
97
- io.read.should == ''
98
- end
86
+ cache_file = subject.cache_file('mybucket', 'hello/world.jpg')
87
+ cache_file.should be_a Configuration::S3SourceStoreBase::CacheRoot::CacheFile
88
+ cache_file.to_s.should == "/tmp/0d/bf/50c256d6b6efe55d11d0b6b50600"
99
89
  end
100
90
  end
101
91
 
@@ -190,13 +180,17 @@ else
190
180
  source_s3 "original_cached_public" bucket="#{ENV['AWS_S3_TEST_BUCKET']}" path="hash" cache-root="/tmp" public="true"
191
181
  source_s3 "original_cached_public2" bucket="#{ENV['AWS_S3_TEST_BUCKET']}" path="hash" cache-root="/tmp" public="true"
192
182
  }
183
+
184
+ post {
185
+ store_s3 "input" bucket="#{ENV['AWS_S3_TEST_BUCKET']}" path="hash" cache-root="/tmp"
186
+ }
193
187
  EOF
194
188
  end
195
189
 
196
190
  before do
197
- @cached_object = Pathname.new('/tmp/ce/26/b196585e28aa99f55b1260b629e2')
198
- @cached_object.dirname.mkpath
199
- @cached_object.open('w') do |io|
191
+ @cache_file = Pathname.new('/tmp/ce/26/b196585e28aa99f55b1260b629e2')
192
+ @cache_file.dirname.mkpath
193
+ @cache_file.open('wb') do |io|
200
194
  header = MessagePack.pack(
201
195
  'private_url' => 'https://s3-eu-west-1.amazonaws.com/test/ghost.jpg?' + ENV['AWS_ACCESS_KEY_ID'],
202
196
  'public_url' => 'https://s3-eu-west-1.amazonaws.com/test/ghost.jpg',
@@ -208,7 +202,7 @@ else
208
202
  end
209
203
  end
210
204
 
211
- it 'should use cache when configured' do
205
+ it 'should use cache when configured and object in cache' do
212
206
  subject.handlers[0].sources[0].should be_a Configuration::S3Source
213
207
  subject.handlers[0].sources[0].realize(state)
214
208
 
@@ -240,64 +234,165 @@ else
240
234
  state.images['original'].source_url.should == 'https://s3-eu-west-1.amazonaws.com/test/ghost.jpg'
241
235
  end
242
236
 
243
- it 'shluld use object stored in S3 when not found in the cache' do
244
- cache_file = Pathname.new('/tmp/af/a3/5eaf0a614693e2d1ed455ac1cedb')
245
- cache_file.unlink if cache_file.exist?
237
+ describe 'read-through' do
238
+ it 'shluld use object stored in S3 when not found in the cache' do
239
+ cache_file = Pathname.new('/tmp/af/a3/5eaf0a614693e2d1ed455ac1cedb')
240
+ cache_file.unlink if cache_file.exist?
246
241
 
247
- state = Configuration::RequestState.new('abc', {test_image: 'test.jpg'})
248
- subject.handlers[0].sources[0].realize(state)
242
+ state = Configuration::RequestState.new('abc', {test_image: 'test.jpg'})
243
+ subject.handlers[0].sources[0].realize(state)
249
244
 
250
- cache_file.should exist
251
- end
245
+ cache_file.should exist
246
+ end
252
247
 
253
- it 'should use cached object writen when it was initialy read' do
254
- cache_file = Pathname.new('/tmp/af/a3/5eaf0a614693e2d1ed455ac1cedb')
255
- cache_file.unlink if cache_file.exist?
248
+ it 'should write cache on read and be able to use it on next read' do
249
+ cache_file = Pathname.new('/tmp/af/a3/5eaf0a614693e2d1ed455ac1cedb')
250
+ cache_file.unlink if cache_file.exist?
256
251
 
257
- state = Configuration::RequestState.new('abc', {test_image: 'test.jpg'})
258
- subject.handlers[0].sources[0].realize(state)
252
+ state = Configuration::RequestState.new('abc', {test_image: 'test.jpg'})
253
+ subject.handlers[0].sources[0].realize(state)
259
254
 
260
- cache_file.should exist
255
+ cache_file.should exist
261
256
 
262
- subject.handlers[0].sources[1].realize(state)
257
+ subject.handlers[0].sources[1].realize(state)
263
258
 
264
- state.images['original'].data.should == @test_data
265
- state.images['original'].mime_type.should == 'image/jpeg'
259
+ state.images['original'].data.should == @test_data
260
+ state.images['original'].mime_type.should == 'image/jpeg'
266
261
 
267
- state.images['original_cached'].data.should == @test_data
268
- state.images['original_cached'].mime_type.should == 'image/jpeg'
269
- end
262
+ state.images['original_cached'].data.should == @test_data
263
+ state.images['original_cached'].mime_type.should == 'image/jpeg'
264
+ end
270
265
 
271
- it 'should use update cached object when new properties are read from S3' do
272
- cache_file = Pathname.new('/tmp/af/a3/5eaf0a614693e2d1ed455ac1cedb')
273
- cache_file.unlink if cache_file.exist?
266
+ it 'should update cached object with new properties read from S3' do
267
+ cache_file = Pathname.new('/tmp/af/a3/5eaf0a614693e2d1ed455ac1cedb')
268
+ cache_file.unlink if cache_file.exist?
274
269
 
275
- state = Configuration::RequestState.new('abc', {test_image: 'test.jpg'})
270
+ state = Configuration::RequestState.new('abc', {test_image: 'test.jpg'})
276
271
 
277
- ## cache with private URL
278
- subject.handlers[0].sources[0].realize(state)
272
+ ## cache with private URL
273
+ subject.handlers[0].sources[0].realize(state)
279
274
 
280
- cache_file.should exist
281
- sum = Digest::SHA2.new.update(cache_file.read).to_s
275
+ cache_file.should exist
276
+ sum = Digest::SHA2.new.update(cache_file.read).to_s
282
277
 
283
- ## read from cache with private URL
284
- subject.handlers[0].sources[1].realize(state)
278
+ ## read from cache with private URL
279
+ subject.handlers[0].sources[1].realize(state)
285
280
 
286
- # no change
287
- Digest::SHA2.new.update(cache_file.read).to_s.should == sum
281
+ # no change
282
+ Digest::SHA2.new.update(cache_file.read).to_s.should == sum
288
283
 
289
- ## read from cache; add public URL
290
- subject.handlers[0].sources[2].realize(state)
284
+ ## read from cache; add public URL
285
+ subject.handlers[0].sources[2].realize(state)
291
286
 
292
- # should get updated
293
- Digest::SHA2.new.update(cache_file.read).to_s.should_not == sum
294
-
295
- sum = Digest::SHA2.new.update(cache_file.read).to_s
296
- ## read from cahce
297
- subject.handlers[0].sources[3].realize(state)
287
+ # should get updated
288
+ Digest::SHA2.new.update(cache_file.read).to_s.should_not == sum
289
+
290
+ sum = Digest::SHA2.new.update(cache_file.read).to_s
291
+ ## read from cahce
292
+ subject.handlers[0].sources[3].realize(state)
298
293
 
299
- # no change
300
- Digest::SHA2.new.update(cache_file.read).to_s.should == sum
294
+ # no change
295
+ Digest::SHA2.new.update(cache_file.read).to_s.should == sum
296
+ end
297
+
298
+ describe 'error handling' do
299
+ let :state do
300
+ Configuration::RequestState.new('abc', {test_image: 'test.jpg'})
301
+ end
302
+
303
+ before :each do
304
+ @cache_file = Pathname.new('/tmp/af/a3/5eaf0a614693e2d1ed455ac1cedb')
305
+ @cache_file.dirname.mkpath
306
+ @cache_file.open('wb') do |io|
307
+ header = 'xyz'
308
+ io.write [header.length].pack('L') # header length
309
+ io.write header
310
+ io.write 'abc'
311
+ end
312
+ end
313
+
314
+ it 'should rewrite cached object when corrupted' do
315
+ subject.handlers[0].sources[0].realize(state)
316
+ state.images['original'].data.should == @test_data
317
+
318
+ cache = @cache_file.read.force_encoding('ASCII-8BIT')
319
+ cache.should_not include 'xyz'
320
+ cache.should include @test_data
321
+ end
322
+
323
+ it 'should use S3 object when cache file is not accessible' do
324
+ @cache_file.chmod(0000)
325
+ begin
326
+ subject.handlers[0].sources[0].realize(state)
327
+ state.images['original'].data.should == @test_data
328
+ ensure
329
+ @cache_file.chmod(0644)
330
+
331
+ cache = @cache_file.read.force_encoding('ASCII-8BIT')
332
+ cache.should include 'xyz'
333
+ cache.should_not include @test_data
334
+ end
335
+ end
336
+
337
+ it 'should use S3 object when cache direcotry is not accessible' do
338
+ @cache_file.dirname.chmod(0000)
339
+ begin
340
+ subject.handlers[0].sources[0].realize(state)
341
+ state.images['original'].data.should == @test_data
342
+ ensure
343
+ @cache_file.dirname.chmod(0755)
344
+
345
+ cache = @cache_file.read.force_encoding('ASCII-8BIT')
346
+ cache.should include 'xyz'
347
+ cache.should_not include @test_data
348
+ end
349
+ end
350
+
351
+ it 'should not store cache file for S3 objects that does not exist' do
352
+ cache_file = Pathname.new('/tmp/a2/fd/4261e9a7586ed772d0c78bb51c9d')
353
+ cache_file.unlink if cache_file.exist?
354
+
355
+ state = Configuration::RequestState.new('abc', {test_image: 'bogous.jpg'})
356
+
357
+ expect {
358
+ subject.handlers[0].sources[0].realize(state)
359
+ }.to raise_error Configuration::S3NoSuchKeyError
360
+
361
+ cache_file.should_not exist
362
+ end
363
+ end
364
+ end
365
+
366
+ describe 'write-through' do
367
+ let :state do
368
+ Configuration::RequestState.new(@test_data, {test_image: 'test_cache.jpg'})
369
+ end
370
+
371
+ before :each do
372
+ end
373
+
374
+ it 'should cache S3 object during write' do
375
+ cache_file = Pathname.new('/tmp/31/f6/d48147b9981bb880fb1861539e3f')
376
+ cache_file.unlink if cache_file.exist?
377
+
378
+ subject.handlers[1].sources[0].realize(state)
379
+ state.images['input'].mime_type = 'image/jpeg'
380
+ subject.handlers[1].stores[0].realize(state)
381
+
382
+ # we have cache
383
+ cache_file.should exist
384
+
385
+ # but delete S3 so it will fail if cache was not used fully
386
+ s3_client = AWS::S3.new(use_ssl: false)
387
+ s3_test_bucket = s3_client.buckets[ENV['AWS_S3_TEST_BUCKET']]
388
+ s3_test_bucket.objects['test_cache.jpg'].delete
389
+
390
+ state = Configuration::RequestState.new('', {test_image: 'test_cache.jpg'})
391
+ expect {
392
+ subject.handlers[0].sources[0].realize(state)
393
+ }.not_to raise_error
394
+ state.images['original'].data.should == @test_data
395
+ end
301
396
  end
302
397
  end
303
398
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: httpimagestore
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-09-11 00:00:00.000000000 Z
12
+ date: 2013-10-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: unicorn-cuba-base
@@ -91,6 +91,22 @@ dependencies:
91
91
  - - ~>
92
92
  - !ruby/object:Gem::Version
93
93
  version: '0.9'
94
+ - !ruby/object:Gem::Dependency
95
+ name: msgpack
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: '0.5'
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: '0.5'
94
110
  - !ruby/object:Gem::Dependency
95
111
  name: httpclient
96
112
  requirement: !ruby/object:Gem::Requirement
@@ -296,7 +312,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
296
312
  version: '0'
297
313
  segments:
298
314
  - 0
299
- hash: 1883353100319806536
315
+ hash: -2918091045365099838
300
316
  required_rubygems_version: !ruby/object:Gem::Requirement
301
317
  none: false
302
318
  requirements: