concord_cacher 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest ADDED
@@ -0,0 +1,19 @@
1
+ README.textile
2
+ Rakefile
3
+ lib/concord_cacher.rb
4
+ lib/concord/cacher.rb
5
+ lib/concord/diy_local_cacher.rb
6
+ lib/concord/java_proxy_cacher.rb
7
+ spec/data/element_reference.otml
8
+ spec/data/empty.otml
9
+ spec/data/recursion.otml
10
+ spec/data/resources/chart_line.png
11
+ spec/data/resources/delete.png
12
+ spec/data/resources/recurse1.otml
13
+ spec/data/resources/recurse2.otml
14
+ spec/data/resources/text.txt
15
+ spec/data/standard_uri.otml
16
+ spec/diy_local_cacher_spec.rb
17
+ spec/helpers/cache_helper.rb
18
+ spec/java_proxy_cacher_spec.rb
19
+ Manifest
data/README.textile ADDED
@@ -0,0 +1,27 @@
1
+ h1. concord_cacher
2
+
3
+ The concord_cacher gem provides support for locally caching a resource and all referenced resources in multiple different ways.
4
+ It is intended for using with other Concord Consortium projects and not necessarily for outside projects.
5
+
6
+ h3. install
7
+
8
+ <pre><code>
9
+ $ gem install concord_cacher
10
+ </code></pre>
11
+
12
+ h3. example
13
+
14
+ <pre><code>
15
+ $ irb
16
+ > require 'rubygems'
17
+ > require 'concord_cacher'
18
+ >
19
+ > cacher = Concord::JavaProxyCacher.new(:url => "http://some/url/to/cache", :cache_dir => "/some/local/dir/to/store/things")
20
+ > cacher.cache
21
+ </code></pre>
22
+
23
+ h3. tests
24
+
25
+ <pre><code>
26
+ $ rake spec
27
+ </code></pre>
data/Rakefile ADDED
@@ -0,0 +1,35 @@
1
+ require 'rubygems'
2
+
3
+ require 'rake'
4
+ require 'spec/rake/spectask'
5
+
6
+ require './lib/concord_cacher.rb'
7
+
8
+ require 'echoe'
9
+ Echoe.new('concord_cacher', '0.0.1') do |p|
10
+ p.description = "concord_cacher provides support for locally caching a resource and all referenced resources in multiple different ways. It is intended for using with other Concord Consortium projects and not necessarily for outside projects."
11
+ p.summary = "Support for locally caching a resource and all referenced resources in multiple different ways"
12
+ p.url = "http://github.com/psndcsrv/concord_cacher"
13
+ p.author = "Aaron Unger"
14
+ p.email = "aunger @nospam@ concord.org"
15
+ p.ignore_pattern = ["tmp/*","pkg/*"]
16
+ p.development_dependencies = []
17
+ end
18
+
19
+ task :default => :spec
20
+ Spec::Rake::SpecTask.new do |t|
21
+ t.spec_files = FileList["spec/**/*_spec.rb"]
22
+ end
23
+
24
+ namespace :hudson do
25
+ task :spec => ["hudson:setup:rspec", 'rake:spec']
26
+
27
+ namespace :setup do
28
+ task :pre_ci do
29
+ ENV["CI_REPORTS"] = 'spec/reports/'
30
+ gem 'ci_reporter'
31
+ require 'ci/reporter/rake/rspec'
32
+ end
33
+ task :rspec => [:pre_ci, "ci:setup:rspec"]
34
+ end
35
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{concord_cacher}
5
+ s.version = "0.0.1"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Aaron Unger"]
9
+ s.cert_chain = ["/Users/aunger/gem-public_cert.pem"]
10
+ s.date = %q{2010-03-25}
11
+ s.description = %q{concord_cacher provides support for locally caching a resource and all referenced resources in multiple different ways. It is intended for using with other Concord Consortium projects and not necessarily for outside projects.}
12
+ s.email = %q{aunger @nospam@ concord.org}
13
+ s.extra_rdoc_files = ["README.textile", "lib/concord_cacher.rb", "lib/concord/cacher.rb", "lib/concord/diy_local_cacher.rb", "lib/concord/java_proxy_cacher.rb"]
14
+ s.files = ["README.textile", "Rakefile", "lib/concord_cacher.rb", "lib/concord/cacher.rb", "lib/concord/diy_local_cacher.rb", "lib/concord/java_proxy_cacher.rb", "spec/data/element_reference.otml", "spec/data/empty.otml", "spec/data/recursion.otml", "spec/data/resources/chart_line.png", "spec/data/resources/delete.png", "spec/data/resources/recurse1.otml", "spec/data/resources/recurse2.otml", "spec/data/resources/text.txt", "spec/data/standard_uri.otml", "spec/diy_local_cacher_spec.rb", "spec/helpers/cache_helper.rb", "spec/java_proxy_cacher_spec.rb", "Manifest", "concord_cacher.gemspec"]
15
+ s.homepage = %q{http://github.com/psndcsrv/concord_cacher}
16
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Concord_cacher", "--main", "README.textile"]
17
+ s.require_paths = ["lib"]
18
+ s.rubyforge_project = %q{concord_cacher}
19
+ s.rubygems_version = %q{1.3.6}
20
+ s.signing_key = %q{/Users/aunger/gem-private_key.pem}
21
+ s.summary = %q{Support for locally caching a resource and all referenced resources in multiple different ways}
22
+
23
+ if s.respond_to? :specification_version then
24
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
25
+ s.specification_version = 3
26
+
27
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
28
+ else
29
+ end
30
+ else
31
+ end
32
+ end
@@ -0,0 +1,234 @@
1
+ class ::Concord::Cacher
2
+ require 'rubygems'
3
+ require 'open-uri'
4
+ require 'cgi'
5
+ require 'rexml/document'
6
+
7
+ DEBUG = false
8
+
9
+ # scan for anything that matches (http://[^'"]+)
10
+ URL_REGEX = /(http[s]?:\/\/[^'"]+)/i
11
+ # the imageBytes can be referenced by a OTImage object
12
+ SRC_REGEX = /(?:src|href|imageBytes)[ ]?=[ ]?['"]([^'"]+)/i
13
+ NLOGO_REGEX = /import-drawing "([^"]+)"/i
14
+ ALWAYS_SKIP_REGEX = /^(mailto|jres)/i # (resourceFile =~ /^mailto/) || (resourceFile =~ /^jres/)
15
+ RECURSE_ONCE_REGEX = /html$/i # (resourceFile =~ /otml$/ || resourceFile =~ /html/)
16
+ RECURSE_FOREVER_REGEX = /(otml|cml|mml|nlogo)$/i
17
+
18
+ attr_reader :otml_url, :cache_dir, :uuid, :errors
19
+
20
+ def initialize(opts = {})
21
+ defaults = {:rewrite_urls => false, :verbose => false, :cache_headers => true, :create_map => true}
22
+ opts = defaults.merge(opts)
23
+ raise ArgumentError, "Must include :url, and :cache_dir in the options hash." unless opts[:url] && opts[:cache_dir]
24
+ @rewrite_urls = opts[:rewrite_urls]
25
+ @cache_dir = opts[:cache_dir]
26
+ @verbose = opts[:verbose]
27
+ @cache_headers = opts[:cache_headers]
28
+ @create_map = opts[:create_map]
29
+ url = opts[:url]
30
+ @filename = File.basename(url, ".otml")
31
+ @content = ""
32
+ open(url) do |r|
33
+ @content_headers = r.respond_to?("meta") ? r.meta : {}
34
+ @content_headers['_http_version'] = "HTTP/1.1 #{r.respond_to?("status") ? r.status.join(" ") : "200 OK"}"
35
+ @content = r.read
36
+ end
37
+ @uuid = generate_uuid
38
+ if (URI.parse(url).kind_of?(URI::HTTP))
39
+ @otml_url = url
40
+ else
41
+ # this probably references something on the local fs. we need to extract the document's codebase, if there is ony
42
+ if @content =~ /<otrunk[^>]+codebase[ ]?=[ ]?['"]([^'"]+)/
43
+ # @otml_url = "#{$1}/#{@filename}.otml"
44
+ @otml_url = "#{$1}"
45
+ @content.sub!(/codebase[ ]?=[ ]?['"][^'"]+['"]/,"")
46
+ else
47
+ @otml_url = url
48
+ end
49
+ end
50
+
51
+ @otml_url.sub!(/[^\/]+$/,"")
52
+
53
+ @errors = {}
54
+
55
+ @url_to_hash_map = {}
56
+ end
57
+
58
+ def cache
59
+ copy_otml_to_local_cache
60
+
61
+ write_url_to_hash_map if @create_map
62
+ end
63
+
64
+ def generate_main_filename
65
+ raise NotImplementedError, "You should be using this class through one of its sub-classes!"
66
+ end
67
+
68
+ def generate_filename(opts = {})
69
+ raise NotImplementedError, "You should be using this class through one of its sub-classes!"
70
+ end
71
+
72
+ def generate_uuid
73
+ raise NotImplementedError, "You should be using this class through one of its sub-classes!"
74
+ end
75
+
76
+ def copy_otml_to_local_cache
77
+ # save the file in the local server directories
78
+ filename = generate_main_filename
79
+
80
+ # open the otml file from the specified url or grab the embedded content
81
+ uri = URI.parse(@otml_url)
82
+ if uri.relative?
83
+ # we need the main URI to be absolute so that we can use it to resolve references
84
+ file_root = URI.parse("file:///")
85
+ uri = file_root.merge(uri)
86
+ end
87
+ @content = parse_file("#{@cache_dir}#{@filename}", @content, @cache_dir, uri, true)
88
+
89
+ write_resource(@cache_dir + filename, @content)
90
+ write_property_map(@cache_dir + filename + ".hdrs", @content_headers) if @cache_headers
91
+ @url_to_hash_map[@otml_url + @filename + ".otml"] = filename
92
+
93
+ puts "\nThere were #{@errors.length} artifacts with errors.\n" if @verbose
94
+ @errors.each do |k,v|
95
+ puts "In #{k}:" if @verbose
96
+ v.uniq.each do |e|
97
+ puts " #{e}" if @verbose
98
+ end
99
+ end
100
+ end
101
+
102
+ def parse_file(orig_filename, content, cache_dir, parent_url, recurse)
103
+ short_filename = /\/([^\/]+)$/.match(orig_filename)[1]
104
+ print "\n#{short_filename}: " if @verbose
105
+ processed_lines = []
106
+ lines = content.split("\n")
107
+ lines.each do |line|
108
+ line = CGI.unescapeHTML(line)
109
+ match_indexes = []
110
+ while ( ((match = URL_REGEX.match(line)) && (! match_indexes.include?(match.begin(1)))) ||
111
+ ((match = SRC_REGEX.match(line)) && (! match_indexes.include?(match.begin(1)))) ||
112
+ (/.*\.nlogo/.match(short_filename) && (match = NLOGO_REGEX.match(line)) && (! match_indexes.include?(match.begin(1)))) )
113
+ print "\nMatched url: #{match[1]}: " if DEBUG
114
+ match_indexes << match.begin(1)
115
+ # get the resource from that location, save it locally
116
+ match_url = match[1].gsub(/\s+/,"").gsub(/[\?\#&;=\+\$,<>"\{\}\|\\\^\[\]].*$/,"")
117
+ # puts("pre: #{match[1]}, post: #{match_url}") if DEBUG
118
+ begin
119
+ resource_url = URI.parse(CGI.unescapeHTML(match_url))
120
+ rescue
121
+ @errors[parent_url] ||= []
122
+ @errors[parent_url] << "Bad URL: '#{CGI.unescapeHTML(match_url)}', skipping."
123
+ print 'x' if @verbose
124
+ next
125
+ end
126
+ if (resource_url.relative?)
127
+ # relative URL's need to have their parent document's codebase appended before trying to download
128
+ resource_url = parent_url.merge(resource_url.to_s)
129
+ end
130
+ resourceFile = match_url
131
+ resourceFile = resourceFile.gsub(/http[s]?:\/\//,"")
132
+ resourceFile = resourceFile.gsub(/\/$/,"")
133
+
134
+ if (resourceFile.length < 1) || ALWAYS_SKIP_REGEX.match(resourceFile)
135
+ print "S" if @verbose
136
+ next
137
+ end
138
+
139
+ begin
140
+ resource_content = ""
141
+ resource_headers = {}
142
+ open(resource_url.scheme == 'file' ? resource_url.path : resource_url.to_s) do |r|
143
+ resource_headers = r.respond_to?("meta") ? r.meta : {}
144
+ resource_headers['_http_version'] = "HTTP/1.1 #{r.respond_to?("status") ? r.status.join(" ") : "200 OK"}"
145
+ resource_content = r.read
146
+ end
147
+ rescue OpenURI::HTTPError, Timeout::Error, Errno::ENOENT => e
148
+ @errors[parent_url] ||= []
149
+ @errors[parent_url] << "Problem getting file: #{resource_url.to_s}, Error: #{e}"
150
+ print 'X' if @verbose
151
+ next
152
+ end
153
+
154
+ localFile = generate_filename(:content => resource_content, :url => resource_url)
155
+ @url_to_hash_map[resource_url.to_s] = localFile
156
+ line.sub!(match_url.to_s,localFile.to_s) if @rewrite_urls
157
+
158
+
159
+ # skip downloading already existing files.
160
+ # because we're working with sha1 hashes we can be reasonably certain the content is a complete match
161
+ if File.exist?(cache_dir + localFile)
162
+ print 's' if @verbose
163
+ else
164
+ # if it's an otml/html file, we should parse it too (only one level down)
165
+ if (recurse && (RECURSE_ONCE_REGEX.match(resourceFile) || RECURSE_FOREVER_REGEX.match(resourceFile)))
166
+ puts "recursively parsing '#{resource_url.to_s}'" if DEBUG
167
+ recurse_further = false
168
+ if RECURSE_FOREVER_REGEX.match(resourceFile)
169
+ recurse_further = true
170
+ end
171
+ begin
172
+ resource_content = parse_file(cache_dir + resourceFile, resource_content, cache_dir, resource_url, recurse_further)
173
+ rescue OpenURI::HTTPError => e
174
+ @errors[parent_url] ||= []
175
+ @errors[parent_url] << "Problem getting or writing file: #{resource_url.to_s}, Error: #{e}"
176
+ print 'X' if @verbose
177
+ next
178
+ end
179
+ end
180
+ begin
181
+ write_resource(cache_dir + localFile, resource_content)
182
+ write_property_map(cache_dir + localFile + ".hdrs", resource_headers) if @cache_headers
183
+ print "." if @verbose
184
+ rescue Exception => e
185
+ @errors[parent_url] ||= []
186
+ @errors[parent_url] << "Problem getting or writing file: #{resource_url.to_s}, Error: #{e}"
187
+ print 'X' if @verbose
188
+ end
189
+ end
190
+ end
191
+ processed_lines << line
192
+ end
193
+
194
+ print ".\n" if @verbose
195
+ return processed_lines.join("\n")
196
+ end
197
+
198
+ def write_resource(filename, content)
199
+ f = File.new(filename, "w")
200
+ f.write(content)
201
+ f.flush
202
+ f.close
203
+ end
204
+
205
+ def write_url_to_hash_map
206
+ load_existing_map if (File.exists?(@cache_dir + "url_map.xml"))
207
+ write_property_map(@cache_dir + "url_map.xml", @url_to_hash_map)
208
+ end
209
+
210
+ def write_property_map(filename, hash_map)
211
+ File.open(filename, "w") do |f|
212
+ f.write('<?xml version="1.0" encoding="UTF-8"?>' + "\n")
213
+ f.write('<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">' + "\n")
214
+ f.write('<properties>' + "\n")
215
+ hash_map.each do |url,hash|
216
+ f.write("<entry key='#{CGI.escapeHTML(url)}'>#{hash}</entry>\n")
217
+ end
218
+ f.write('</properties>' + "\n")
219
+ f.flush
220
+ end
221
+ end
222
+
223
+ def load_existing_map
224
+ map_content = ::REXML::Document.new(File.new(@cache_dir + "url_map.xml")).root
225
+ map_content.elements.each("entry") do |entry|
226
+ k = entry.attributes["key"]
227
+ if ! (@url_to_hash_map.include? k)
228
+ val = entry.text
229
+ @url_to_hash_map[k] = val
230
+ # puts "Adding previously defined url: #{k} => #{val}" if DEBUG
231
+ end
232
+ end
233
+ end
234
+ end
@@ -0,0 +1,31 @@
1
+ class ::Concord::DiyLocalCacher < ::Concord::Cacher
2
+ require 'uri'
3
+ require 'digest/sha1'
4
+
5
+ def initialize(opts = {})
6
+ raise InvalidArgumentError, "Must include :activity in the options hash." unless opts[:activity]
7
+ @activity = opts[:activity]
8
+ opts[:cache_headers] ||= false
9
+ opts[:create_map] ||= false
10
+ opts[:rewrite_urls] ||= true
11
+ super
12
+ end
13
+
14
+ def generate_main_filename
15
+ "#{generate_uuid}.otml"
16
+ end
17
+
18
+ def generate_uuid
19
+ @activity.uuid
20
+ end
21
+
22
+ def generate_filename(opts = {})
23
+ raise InvalidArgumentError, "Must include :url key in opts" unless opts[:url]
24
+ url = opts[:url]
25
+ if url.kind_of?(::URI) && url.scheme == 'file'
26
+ url = url.path
27
+ end
28
+ url = url.to_s
29
+ return ::Digest::SHA1.hexdigest(url)
30
+ end
31
+ end
@@ -0,0 +1,16 @@
1
+ class ::Concord::JavaProxyCacher < ::Concord::Cacher
2
+ require 'digest/sha1'
3
+
4
+ def generate_main_filename
5
+ generate_filename(:content => @content)
6
+ end
7
+
8
+ def generate_uuid
9
+ generate_filename(:content => @content)
10
+ end
11
+
12
+ def generate_filename(opts = {})
13
+ raise InvalidArgumentError, "Must include :content key in opts" unless opts[:content]
14
+ ::Digest::SHA1.hexdigest(opts[:content])
15
+ end
16
+ end
@@ -0,0 +1,8 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ module Concord
5
+ require 'concord/cacher'
6
+ require 'concord/diy_local_cacher'
7
+ require 'concord/java_proxy_cacher'
8
+ end
@@ -0,0 +1,22 @@
1
+ <otrunk id="67daad67-8762-4f5a-8c7c-50b7c3544c0b">
2
+ <imports>
3
+ <import class="org.concord.otrunk.OTSystem" />
4
+ </imports>
5
+ <objects>
6
+ <OTSystem local_id="system">
7
+ <root>
8
+ <OTText>
9
+ <!-- Absolute references -->
10
+ <a href="http://loops.diy.concord.org/">Visit the Concord Website</a>
11
+ <img src="http://portal.concord.org/images/icons/chart_bar.png" />
12
+ <OTBlob imageBytes="http://portal.concord.org/images/icons/chart_pie.png" />
13
+
14
+ <!-- relative references -->
15
+ <a href="resources/text.txt">Read some text</a>
16
+ <img src="resources/delete.png" />
17
+ <OTBlob imageBytes="resources/chart_line.png" />
18
+ </OTText>
19
+ </root>
20
+ </OTSystem>
21
+ </objects>
22
+ </otrunk>
@@ -0,0 +1,15 @@
1
+ <!--
2
+ This is a pseudo-otml file which holds all sorts of ways to reference a file.
3
+ It won't actually load in OTrunk, and is only really useful to test positive results -
4
+ i.e. that the cacher correctly finds and caches all of the various urls represented here.
5
+ It won't allow us to detect if the cacher is caching unexpected URLS.
6
+ -->
7
+ <otrunk id="de74ee26-53a6-4edd-b4b1-2f2c5ac7ec21">
8
+ <imports>
9
+ <import class="org.concord.otrunk.OTSystem" />
10
+ </imports>
11
+ <objects>
12
+ <OTSystem local_id="system">
13
+ </OTSystem>
14
+ </objects>
15
+ </otrunk>
@@ -0,0 +1,14 @@
1
+ <otrunk id="6b230c1d-8816-4ef3-9c7a-b84a0da0b35f">
2
+ <imports>
3
+ <import class="org.concord.otrunk.OTSystem" />
4
+ </imports>
5
+ <objects>
6
+ <OTSystem local_id="system">
7
+ <root>
8
+ <OTText>
9
+ <a href="resources/recurse1.otml">Recursive otml</a>
10
+ </OTText>
11
+ </root>
12
+ </OTSystem>
13
+ </objects>
14
+ </otrunk>
Binary file
Binary file
@@ -0,0 +1,15 @@
1
+ <otrunk id="6b230c1d-8816-4ef3-9c7a-b84a0da0b35f">
2
+ <imports>
3
+ <import class="org.concord.otrunk.OTSystem" />
4
+ </imports>
5
+ <objects>
6
+ <OTSystem local_id="system">
7
+ <root>
8
+ <OTText>
9
+ <a href="recurse2.otml">Recursive otml</a>
10
+ <img src="delete.png" />
11
+ </OTText>
12
+ </root>
13
+ </OTSystem>
14
+ </objects>
15
+ </otrunk>
@@ -0,0 +1,14 @@
1
+ <otrunk id="6b230c1d-8816-4ef3-9c7a-b84a0da0b35f">
2
+ <imports>
3
+ <import class="org.concord.otrunk.OTSystem" />
4
+ </imports>
5
+ <objects>
6
+ <OTSystem local_id="system">
7
+ <root>
8
+ <OTText>
9
+ <OTBlob imageBytes="chart_line.png" />
10
+ </OTText>
11
+ </root>
12
+ </OTSystem>
13
+ </objects>
14
+ </otrunk>
@@ -0,0 +1 @@
1
+ This is some text.
@@ -0,0 +1,17 @@
1
+ <otrunk id="6b230c1d-8816-4ef3-9c7a-b84a0da0b35f">
2
+ <imports>
3
+ <import class="org.concord.otrunk.OTSystem" />
4
+ </imports>
5
+ <objects>
6
+ <OTSystem local_id="system">
7
+ <root>
8
+ <OTText>
9
+ <!-- normal http -->
10
+ http://portal.concord.org/images/icons/delete.png
11
+ <!-- https -->
12
+ https://mail.google.com/mail/images/2/5/mountains/base/gmail_solid_white.png
13
+ </OTText>
14
+ </root>
15
+ </OTSystem>
16
+ </objects>
17
+ </otrunk>
@@ -0,0 +1,225 @@
1
+ begin
2
+ require 'concord_cacher'
3
+ rescue LoadError
4
+ require File.join(File.dirname(__FILE__), '..','lib','concord.rb')
5
+ end
6
+
7
+ require File.join(File.dirname(__FILE__),'helpers','cache_helper.rb')
8
+
9
+ require 'fileutils'
10
+
11
+ include FileUtils
12
+
13
+ require 'openssl'
14
+ module OpenSSL
15
+ module SSL
16
+ remove_const :VERIFY_PEER
17
+ end
18
+ end
19
+ OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE
20
+
21
+ SPEC_ROOT = File.expand_path(File.dirname(__FILE__))
22
+
23
+ describe 'DIY Local Cacher' do
24
+ include CacheHelper
25
+
26
+ before(:each) do
27
+ @klass = Concord::DiyLocalCacher
28
+ @cache = File.join(SPEC_ROOT, '..', 'tmp','diy_local')
29
+ mkdir_p(@cache)
30
+ @cache += '/'
31
+ end
32
+
33
+ def mockup(file)
34
+ return mock('activity',{:uuid => 'hash', :url => file})
35
+ end
36
+
37
+ after(:each) do
38
+ rm_rf(@cache)
39
+ end
40
+
41
+ describe 'empty otml' do
42
+ it 'should not create a url map xml file' do
43
+ cache('empty.otml', :activity => mockup('empty.otml'))
44
+ does_not_exist?('url_map.xml')
45
+ end
46
+
47
+ it 'should create a cached file of the original url' do
48
+ url = File.join(SPEC_ROOT,'data','empty.otml')
49
+ cache('empty.otml', :activity => mockup('empty.otml'))
50
+ exists?('hash.otml')
51
+ end
52
+
53
+ it 'should not create a cached header of the original url' do
54
+ url = File.join(SPEC_ROOT,'data','empty.otml')
55
+ expected_filename = 'hash.otml'
56
+ cache('empty.otml', :activity => mockup('empty.otml'))
57
+ does_not_exist?("#{expected_filename}.hdrs")
58
+ end
59
+ end
60
+
61
+ describe 'standard uri syntax' do
62
+ it 'should cache 2 referenced files' do
63
+ expected_files = []
64
+ expected_files << 'hash.otml' # standard_uri.otml
65
+ expected_files << ::Digest::SHA1.hexdigest('http://portal.concord.org/images/icons/delete.png')
66
+ expected_files << ::Digest::SHA1.hexdigest('https://mail.google.com/mail/images/2/5/mountains/base/gmail_solid_white.png')
67
+
68
+ cache('standard_uri.otml', :activity => mockup('standard_uri.otml'))
69
+
70
+ cache_size.should == 3
71
+ expected_files.each do |f|
72
+ exists?(f)
73
+ end
74
+
75
+
76
+ end
77
+
78
+ it 'should rewrite the urls in the main otml file' do
79
+ cache('standard_uri.otml', :activity => mockup('standard_uri.otml'))
80
+
81
+ file_content = File.read(File.join(@cache,'hash.otml'))
82
+
83
+ file_content.should_not match(/http:/)
84
+ file_content.should match(::Digest::SHA1.hexdigest('http://portal.concord.org/images/icons/delete.png'))
85
+ file_content.should match(::Digest::SHA1.hexdigest('https://mail.google.com/mail/images/2/5/mountains/base/gmail_solid_white.png'))
86
+ end
87
+ end
88
+
89
+ describe 'element references syntax' do
90
+ it 'should cache 6 referenced files' do
91
+ expected_files = []
92
+ expected_files << 'hash.otml' # element_reference.otml
93
+ expected_files << ::Digest::SHA1.hexdigest('http://loops.diy.concord.org/')
94
+ expected_files << ::Digest::SHA1.hexdigest('http://portal.concord.org/images/icons/chart_bar.png')
95
+ expected_files << ::Digest::SHA1.hexdigest('http://portal.concord.org/images/icons/chart_pie.png')
96
+ expected_files << ::Digest::SHA1.hexdigest(File.join(SPEC_ROOT,'data','resources','text.txt'))
97
+ expected_files << ::Digest::SHA1.hexdigest(File.join(SPEC_ROOT,'data','resources','delete.png'))
98
+ expected_files << ::Digest::SHA1.hexdigest(File.join(SPEC_ROOT,'data','resources','chart_line.png'))
99
+
100
+ cache('element_reference.otml', :activity => mockup('element_reference.otml'))
101
+
102
+ cache_size.should == 7
103
+ expected_files.each do |f|
104
+ exists?(f)
105
+ end
106
+ end
107
+
108
+ it 'should rewrite the urls in the main otml file' do
109
+ expected_urls = []
110
+ unexpected_urls = []
111
+ unexpected_urls << 'http://loops.diy.concord.org/'
112
+ unexpected_urls << 'http://portal.concord.org/images/icons/chart_bar.png'
113
+ unexpected_urls << 'http://portal.concord.org/images/icons/chart_pie.png'
114
+ unexpected_urls << File.join('resources','text.txt')
115
+ unexpected_urls << File.join('resources','delete.png')
116
+ unexpected_urls << File.join('resources','chart_line.png')
117
+
118
+ unexpected_urls.each do |url|
119
+ if url =~ /^http/
120
+ expected_urls << ::Digest::SHA1.hexdigest(url)
121
+ else
122
+ expected_urls << ::Digest::SHA1.hexdigest(File.join(SPEC_ROOT,'data',url))
123
+ end
124
+ end
125
+
126
+ cache('element_reference.otml', :activity => mockup('element_reference.otml'))
127
+
128
+ file_content = File.read(File.join(@cache,'hash.otml'))
129
+
130
+ unexpected_urls.each do |url|
131
+ file_content.should_not match(Regexp.new(url))
132
+ end
133
+
134
+ expected_urls.each do |url|
135
+ file_content.should match(Regexp.new(url))
136
+ end
137
+ end
138
+ end
139
+
140
+ describe 'recursive references' do
141
+ it 'should cache 4 referenced files in otml files' do
142
+ expected_files = []
143
+ expected_files << 'hash.otml' # recursion.otml
144
+ expected_files << ::Digest::SHA1.hexdigest(File.join(SPEC_ROOT,'data','resources','recurse1.otml'))
145
+ expected_files << ::Digest::SHA1.hexdigest(File.join(SPEC_ROOT,'data','resources','delete.png'))
146
+ expected_files << ::Digest::SHA1.hexdigest(File.join(SPEC_ROOT,'data','resources','recurse2.otml'))
147
+ expected_files << ::Digest::SHA1.hexdigest(File.join(SPEC_ROOT,'data','resources','chart_line.png'))
148
+
149
+ cache('recursion.otml', :activity => mockup('recursion.otml'))
150
+
151
+ cache_size.should == 5
152
+ expected_files.each do |f|
153
+ exists?(f)
154
+ end
155
+ end
156
+
157
+ it 'should rewrite urls in first level recursion otml' do
158
+ recurse_otml = ::Digest::SHA1.hexdigest(File.join(SPEC_ROOT,'data','resources','recurse1.otml'))
159
+
160
+ expected_urls = []
161
+ unexpected_urls = []
162
+
163
+ unexpected_urls << File.join('resources','recurse2.otml')
164
+ unexpected_urls << File.join('resources','delete.png')
165
+
166
+ unexpected_urls.each do |url|
167
+ expected_urls << ::Digest::SHA1.hexdigest(File.join(SPEC_ROOT,'data',url))
168
+ end
169
+
170
+ cache('recursion.otml', :activity => mockup('recursion.otml'))
171
+
172
+ file_content = File.read(File.join(@cache,recurse_otml))
173
+
174
+ unexpected_urls.each do |url|
175
+ file_content.should_not match(Regexp.new(url))
176
+ end
177
+
178
+ expected_urls.each do |url|
179
+ file_content.should match(Regexp.new(url))
180
+ end
181
+ end
182
+
183
+ it 'should rewrite urls in second level recursion otml' do
184
+ recurse_otml = ::Digest::SHA1.hexdigest(File.join(SPEC_ROOT,'data','resources','recurse2.otml'))
185
+
186
+ expected_urls = []
187
+ unexpected_urls = []
188
+
189
+ unexpected_urls << File.join('resources','chart_line.png')
190
+
191
+ unexpected_urls.each do |url|
192
+ expected_urls << ::Digest::SHA1.hexdigest(File.join(SPEC_ROOT,'data',url))
193
+ end
194
+
195
+ cache('recursion.otml', :activity => mockup('recursion.otml'))
196
+
197
+ file_content = File.read(File.join(@cache,recurse_otml))
198
+
199
+ unexpected_urls.each do |url|
200
+ file_content.should_not match(Regexp.new(url))
201
+ end
202
+
203
+ expected_urls.each do |url|
204
+ file_content.should match(Regexp.new(url))
205
+ end
206
+ end
207
+ end
208
+
209
+ describe 'embedded nlogo files' do
210
+ it 'should correctly download resources referenced from within netlogo model files'
211
+ end
212
+
213
+ describe 'embedded mw files' do
214
+ it 'should correctly download resources referenced from within mw model files'
215
+ end
216
+
217
+ describe 'never cache' do
218
+ it 'should always skip mailto and jres references'
219
+ end
220
+
221
+ describe 'recursion limits' do
222
+ it 'should only recurse html files once'
223
+ it 'should recurse otml,cml,mml and nlogo files forever'
224
+ end
225
+ end
@@ -0,0 +1,21 @@
1
+ module CacheHelper
2
+ def cache(file, opts = {})
3
+ options = {:url => File.join(SPEC_ROOT,'data',file), :cache_dir => @cache, :verbose => false}.merge(opts)
4
+ cacher = @klass.new(options)
5
+ cacher.cache
6
+ end
7
+
8
+ def exists?(file)
9
+ f = File.join(@cache,file)
10
+ File.should be_exists(f)
11
+ end
12
+
13
+ def does_not_exist?(file)
14
+ f = File.join(@cache,file)
15
+ File.should_not be_exists(f)
16
+ end
17
+
18
+ def cache_size
19
+ Dir.glob(@cache + "/*").size
20
+ end
21
+ end
@@ -0,0 +1,138 @@
1
+ begin
2
+ require 'concord_cacher'
3
+ rescue LoadError
4
+ require File.join(File.dirname(__FILE__), '..','lib','concord.rb')
5
+ end
6
+
7
+ require File.join(File.dirname(__FILE__),'helpers','cache_helper.rb')
8
+
9
+ require 'fileutils'
10
+
11
+ include FileUtils
12
+
13
+ require 'openssl'
14
+ module OpenSSL
15
+ module SSL
16
+ remove_const :VERIFY_PEER
17
+ end
18
+ end
19
+ OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE
20
+
21
+ SPEC_ROOT = File.expand_path(File.dirname(__FILE__))
22
+
23
+
24
+ describe 'Java Proxy Cacher' do
25
+ include CacheHelper
26
+
27
+ before(:each) do
28
+ @klass = Concord::JavaProxyCacher
29
+ @cache = File.join(SPEC_ROOT, "..", 'tmp','java_proxy')
30
+ mkdir_p(@cache)
31
+ @cache += '/'
32
+ end
33
+
34
+ after(:each) do
35
+ rm_rf(@cache)
36
+ end
37
+
38
+ describe 'empty otml' do
39
+ it 'should create a url map xml file' do
40
+ cache('empty.otml')
41
+ exists?('url_map.xml')
42
+ end
43
+
44
+ it 'should create a cached file of the original url' do
45
+ url = File.join(SPEC_ROOT,'data','empty.otml')
46
+ expected_filename = ::Digest::SHA1.hexdigest(File.read(url))
47
+ cache('empty.otml')
48
+ exists?(expected_filename)
49
+ end
50
+
51
+ it 'should create a cached header of the original url' do
52
+ url = File.join(SPEC_ROOT,'data','empty.otml')
53
+ expected_filename = ::Digest::SHA1.hexdigest(File.read(url))
54
+ cache('empty.otml')
55
+ exists?("#{expected_filename}.hdrs")
56
+ end
57
+ end
58
+
59
+ describe 'standard uri syntax' do
60
+ it 'should cache 2 referenced files' do
61
+ expected_files = []
62
+ expected_files << 'e954312036485d3ca1894265922d9bd9491bf59e' # standard_uri.otml
63
+ expected_files << '8f0ebcb45d7ba71a541d4781329f4a6900c7ee65' # http://portal.concord.org/images/icons/delete.png
64
+ expected_files << '21b8b442e4449f642fcbd6796f4f0f937ec6c70d' # https://mail.google.com/mail/images/2/5/mountains/base/gmail_solid_white.png
65
+ expected_files << expected_files.collect{|f| f+".hdrs" } # headers for each file
66
+ expected_files.flatten!
67
+ expected_files << 'url_map.xml'
68
+
69
+ cache('standard_uri.otml')
70
+
71
+ cache_size.should == 7
72
+ expected_files.each do |f|
73
+ exists?(f)
74
+ end
75
+ end
76
+ end
77
+
78
+ describe 'element references syntax' do
79
+ it 'should cache 6 referenced files' do
80
+ expected_files = []
81
+ expected_files << '9f945e576290efa874842b4ee07ab437d9d94a67' # element_reference.otml
82
+ expected_files << 'd9a2565586307e2924c953dfe788154749e93799' # http://loops.diy.concord.org/
83
+ expected_files << '4e9576a56db3d142113b8905d7aa93e31c9f441b' # http://portal.concord.org/images/icons/chart_bar.png
84
+ expected_files << '41f082b7e69a399679a47acfdcd7e7a204e49745' # http://portal.concord.org/images/icons/chart_pie.png
85
+ expected_files << 'cbe7ac86926fd3b8aa8659842a1d8c299d8966a7' # resources/text.txt
86
+ expected_files << '8f0ebcb45d7ba71a541d4781329f4a6900c7ee65' # resources/delete.png
87
+ expected_files << 'd1cea238486aeeba9215d56bf71efc243754fe48' # resources/chart_line.png
88
+ expected_files << expected_files.collect{|f| f+".hdrs" } # headers for each file
89
+ expected_files.flatten!
90
+ expected_files << 'url_map.xml'
91
+
92
+ cache('element_reference.otml')
93
+
94
+ cache_size.should == 15
95
+ expected_files.each do |f|
96
+ exists?(f)
97
+ end
98
+ end
99
+ end
100
+
101
+ describe 'recursive references' do
102
+ it 'should cache 4 referenced files in otml files' do
103
+ expected_files = []
104
+ expected_files << 'dbbd46b446a205047cfbf32e7af350a73c38848d' # recursion.otml
105
+ expected_files << 'cdc3d425b0ac9c3e89e1b79e0ad8a07c09bcedbd' # resources/recurse1.otml
106
+ expected_files << '8f0ebcb45d7ba71a541d4781329f4a6900c7ee65' # resources/delete.png
107
+ expected_files << '10f39c75f40386e8fbbb9320b6e77f3bd12b0f1d' # resources/recurse2.otml
108
+ expected_files << 'd1cea238486aeeba9215d56bf71efc243754fe48' # resources/chart_line.png
109
+ expected_files << expected_files.collect{|f| f+".hdrs" } # headers for each file
110
+ expected_files.flatten!
111
+ expected_files << 'url_map.xml'
112
+
113
+ cache('recursion.otml')
114
+
115
+ cache_size.should == 11
116
+ expected_files.each do |f|
117
+ exists?(f)
118
+ end
119
+ end
120
+ end
121
+
122
+ describe 'embedded nlogo files' do
123
+ it 'should correctly download resources referenced from within netlogo model files'
124
+ end
125
+
126
+ describe 'embedded mw files' do
127
+ it 'should correctly download resources referenced from within mw model files'
128
+ end
129
+
130
+ describe 'never cache' do
131
+ it 'should always skip mailto and jres references'
132
+ end
133
+
134
+ describe 'recursion limits' do
135
+ it 'should only recurse html files once'
136
+ it 'should recurse otml,cml,mml and nlogo files forever'
137
+ end
138
+ end
data.tar.gz.sig ADDED
@@ -0,0 +1 @@
1
+ ��j��oZ=y��/��A���d@��ǃ($ ��*] >Zv�r�� � �&V�c��Csأ���n��f�'���9�yw�d��G�������.��E�����âf�I�J�єe)V��N[1�
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: concord_cacher
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Aaron Unger
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain:
16
+ - |
17
+ -----BEGIN CERTIFICATE-----
18
+ MIIDMjCCAhqgAwIBAgIBADANBgkqhkiG9w0BAQUFADA/MQ8wDQYDVQQDDAZhdW5n
19
+ ZXIxFzAVBgoJkiaJk/IsZAEZFgdjb25jb3JkMRMwEQYKCZImiZPyLGQBGRYDb3Jn
20
+ MB4XDTEwMDMyNTE1NDM0MloXDTExMDMyNTE1NDM0MlowPzEPMA0GA1UEAwwGYXVu
21
+ Z2VyMRcwFQYKCZImiZPyLGQBGRYHY29uY29yZDETMBEGCgmSJomT8ixkARkWA29y
22
+ ZzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMcObwOz2qlPRc/6PVHq
23
+ 7rrPxt+Ltc9shUtXrIjD0mQMqZtln6TkFnilPAXUd/NbxLrvZqrpPL5GeE5SlKA6
24
+ Hy2J5cj1A0c3qHmskaPnVma6NpnHmlWkIj1u//yCjz2ktaFYV7dHN2IlRK9M8t7F
25
+ Wt8DeVclSOeq3jOhjoOsSX5rIN8/MsWZcq42deHHiHQ0R5htLb1rmZTFKJynKA+m
26
+ Is6vytLGO/2iDeuiW8FQxBUwDEc7LNpfANCNk4WudUGi6K1WeteqTpyzJcso2yVZ
27
+ aMLpviD8WUxUZahz226ThwVLoTJB70GM8BVOFXApNdvbM0b45d1neyeHyU2f2ZeF
28
+ 9g8CAwEAAaM5MDcwCQYDVR0TBAIwADALBgNVHQ8EBAMCBLAwHQYDVR0OBBYEFGQn
29
+ 5Lsh1f/5AryTzU2cmoKsh6XqMA0GCSqGSIb3DQEBBQUAA4IBAQAfrvFWSUZPQnWg
30
+ 5JxdAaHDMdVRlJGOTTpL/2L9GcDr8jaG32AOMcRDzhcCwKE5oBQZixUCidcvlh1y
31
+ 8diuKpwKZrFtZT3RVwZm3wNHk9TaSprCJkhjhATLWZ2h9bGoTA2FH7VLpacUcG9r
32
+ L7W8Y8jL7G4+TZYCLcnH5O3XQ7AkAYVYogt4DLYN9Ma7xwN3MAFfuCRJhY49XLYd
33
+ Wv+hK/ewN+n0Uub+jPpW1Ahi7Ah67RVIRpsWfNpEjx+CB4hPY2GwjpmIKhgt4Fyg
34
+ c90zsqVWrMWBRgRiSl0yYsmuGSPmllNn2m9Vq4ZAHpjCBv5E0AObBv0g/WQgiMjX
35
+ 8kT2T2VF
36
+ -----END CERTIFICATE-----
37
+
38
+ date: 2010-03-25 00:00:00 -04:00
39
+ default_executable:
40
+ dependencies: []
41
+
42
+ description: concord_cacher provides support for locally caching a resource and all referenced resources in multiple different ways. It is intended for using with other Concord Consortium projects and not necessarily for outside projects.
43
+ email: aunger @nospam@ concord.org
44
+ executables: []
45
+
46
+ extensions: []
47
+
48
+ extra_rdoc_files:
49
+ - README.textile
50
+ - lib/concord_cacher.rb
51
+ - lib/concord/cacher.rb
52
+ - lib/concord/diy_local_cacher.rb
53
+ - lib/concord/java_proxy_cacher.rb
54
+ files:
55
+ - README.textile
56
+ - Rakefile
57
+ - lib/concord_cacher.rb
58
+ - lib/concord/cacher.rb
59
+ - lib/concord/diy_local_cacher.rb
60
+ - lib/concord/java_proxy_cacher.rb
61
+ - spec/data/element_reference.otml
62
+ - spec/data/empty.otml
63
+ - spec/data/recursion.otml
64
+ - spec/data/resources/chart_line.png
65
+ - spec/data/resources/delete.png
66
+ - spec/data/resources/recurse1.otml
67
+ - spec/data/resources/recurse2.otml
68
+ - spec/data/resources/text.txt
69
+ - spec/data/standard_uri.otml
70
+ - spec/diy_local_cacher_spec.rb
71
+ - spec/helpers/cache_helper.rb
72
+ - spec/java_proxy_cacher_spec.rb
73
+ - Manifest
74
+ - concord_cacher.gemspec
75
+ has_rdoc: true
76
+ homepage: http://github.com/psndcsrv/concord_cacher
77
+ licenses: []
78
+
79
+ post_install_message:
80
+ rdoc_options:
81
+ - --line-numbers
82
+ - --inline-source
83
+ - --title
84
+ - Concord_cacher
85
+ - --main
86
+ - README.textile
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ segments:
94
+ - 0
95
+ version: "0"
96
+ required_rubygems_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ segments:
101
+ - 1
102
+ - 2
103
+ version: "1.2"
104
+ requirements: []
105
+
106
+ rubyforge_project: concord_cacher
107
+ rubygems_version: 1.3.6
108
+ signing_key:
109
+ specification_version: 3
110
+ summary: Support for locally caching a resource and all referenced resources in multiple different ways
111
+ test_files: []
112
+
metadata.gz.sig ADDED
Binary file