concord_cacher 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +2 -1
- data/concord_cacher.gemspec +1 -1
- data/lib/concord/resource.rb +23 -13
- data/spec/data/always_skip.otml +1 -0
- data/spec/data/xml_entities.otml +2 -1
- data/spec/diy_local_cacher_spec.rb +1 -0
- data/spec/java_proxy_cacher_spec.rb +32 -2
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ require 'spec/rake/spectask'
|
|
6
6
|
require './lib/concord_cacher.rb'
|
7
7
|
|
8
8
|
require 'echoe'
|
9
|
-
Echoe.new('concord_cacher', '0.1.
|
9
|
+
Echoe.new('concord_cacher', '0.1.10') do |p|
|
10
10
|
p.description = "concord_cacher provides support for locally caching a resource and all referenced resources in multiple different ways. It is intended for using with other Concord Consortium projects and not necessarily for outside projects."
|
11
11
|
p.summary = "Support for locally caching a resource and all referenced resources in multiple different ways"
|
12
12
|
p.url = "http://github.com/psndcsrv/concord_cacher"
|
@@ -19,6 +19,7 @@ Echoe.new('concord_cacher', '0.1.9') do |p|
|
|
19
19
|
end
|
20
20
|
|
21
21
|
task :default => :spec
|
22
|
+
|
22
23
|
Spec::Rake::SpecTask.new do |t|
|
23
24
|
t.spec_files = FileList["spec/**/*_spec.rb"]
|
24
25
|
end
|
data/concord_cacher.gemspec
CHANGED
data/lib/concord/resource.rb
CHANGED
@@ -29,6 +29,7 @@ class ::Concord::Resource
|
|
29
29
|
ALWAYS_SKIP_REGEXES = []
|
30
30
|
ALWAYS_SKIP_REGEXES << Regexp.new(/^(mailto|jres)/i)
|
31
31
|
ALWAYS_SKIP_REGEXES << Regexp.new(/http[s]?:\/\/.*?w3\.org\//i)
|
32
|
+
ALWAYS_SKIP_REGEXES << Regexp.new(/http[s]?:\/\/xml.apache.org\/xslt/i)
|
32
33
|
|
33
34
|
RECURSE_ONCE_REGEX = /html$/i
|
34
35
|
RECURSE_FOREVER_REGEX = /(otml|cml|mml|nlogo)$/i
|
@@ -69,6 +70,10 @@ class ::Concord::Resource
|
|
69
70
|
@url_map[k] = v
|
70
71
|
end
|
71
72
|
|
73
|
+
def self.unmap(k)
|
74
|
+
@url_map.delete(k)
|
75
|
+
end
|
76
|
+
|
72
77
|
def self.error(u,str)
|
73
78
|
@errors[u] ||= []
|
74
79
|
@errors[u] << str
|
@@ -88,7 +93,7 @@ class ::Concord::Resource
|
|
88
93
|
f.flush
|
89
94
|
end
|
90
95
|
write_property_map(self.cache_dir + self.local_filename + ".hdrs", self.headers) if self.class.cache_headers
|
91
|
-
|
96
|
+
|
92
97
|
end
|
93
98
|
|
94
99
|
# Reserving the file will prohibit any further references to this same file to be skipped, this avoiding endlessly recursing references
|
@@ -98,6 +103,7 @@ class ::Concord::Resource
|
|
98
103
|
|
99
104
|
def release
|
100
105
|
FileUtils.rm(self.cache_dir + @local_filename) if @local_filename
|
106
|
+
::Concord::Resource.unmap(self.uri_str) if self.class.create_map
|
101
107
|
end
|
102
108
|
|
103
109
|
def exists?
|
@@ -109,8 +115,9 @@ class ::Concord::Resource
|
|
109
115
|
self.headers = r.respond_to?("meta") ? r.meta : {}
|
110
116
|
self.headers['_http_version'] = "HTTP/1.1 #{r.respond_to?("status") ? r.status.join(" ") : "200 OK"}"
|
111
117
|
self.content = r.read
|
112
|
-
self.remove_codebase if self.class.rewrite_urls
|
113
118
|
end
|
119
|
+
self.remove_codebase if self.class.rewrite_urls
|
120
|
+
::Concord::Resource.map(self.uri_str, self.local_filename) if self.class.create_map
|
114
121
|
end
|
115
122
|
|
116
123
|
def has_codebase?
|
@@ -125,13 +132,14 @@ class ::Concord::Resource
|
|
125
132
|
def process
|
126
133
|
print "\n#{self.remote_filename}: " if self.class.verbose
|
127
134
|
processed_lines = []
|
135
|
+
ending_newlines = self.content[/([\n]+)$/m, 1]
|
128
136
|
lines = self.content.split("\n")
|
129
137
|
lines.each do |line|
|
130
138
|
processed_lines << _process_line(line)
|
131
139
|
end
|
132
140
|
|
133
141
|
print ".\n" if self.class.verbose
|
134
|
-
self.content = processed_lines.join("\n")
|
142
|
+
self.content = processed_lines.join("\n") + (ending_newlines || '')
|
135
143
|
end
|
136
144
|
|
137
145
|
def uri_str
|
@@ -167,31 +175,31 @@ class ::Concord::Resource
|
|
167
175
|
private
|
168
176
|
|
169
177
|
def _line_matches(line)
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
178
|
+
urls = line.scan(URL_REGEX)
|
179
|
+
urls += line.scan(SRC_REGEX)
|
180
|
+
urls += _line_matches_by_file(line)
|
181
|
+
|
182
|
+
return urls.flatten.compact.uniq || []
|
174
183
|
end
|
175
184
|
|
176
185
|
def _line_matches_by_file(line)
|
177
186
|
reg = FILE_SPECIFIC_REGEXES.detect{|r,v| r.match(self.remote_filename)}
|
178
187
|
# reg[0] is the file regex, reg[1] is an array of regexes for that file type
|
179
188
|
if reg
|
180
|
-
return reg[1].map{|r2|
|
189
|
+
return reg[1].map{|r2| line.scan(r2) }
|
181
190
|
else
|
182
|
-
return
|
191
|
+
return []
|
183
192
|
end
|
184
193
|
end
|
185
194
|
|
186
195
|
def _process_line(line)
|
187
196
|
orig_line = line
|
188
197
|
line = CGI.unescapeHTML(line)
|
189
|
-
|
190
|
-
|
198
|
+
matches = _line_matches(line)
|
199
|
+
matches.each do |match|
|
191
200
|
print "\nMatched url: #{match[1]}: " if self.class.debug
|
192
|
-
match_indexes << match.begin(1)
|
193
201
|
resource = Concord::Resource.new
|
194
|
-
resource.url = match
|
202
|
+
resource.url = match
|
195
203
|
resource.cache_dir = self.cache_dir
|
196
204
|
catch :nextResource do
|
197
205
|
_handle_resource(resource)
|
@@ -262,6 +270,8 @@ class ::Concord::Resource
|
|
262
270
|
print 'X' if self.class.verbose
|
263
271
|
resource.release
|
264
272
|
throw :nextResource
|
273
|
+
rescue Timeout::Error
|
274
|
+
retry
|
265
275
|
end
|
266
276
|
end
|
267
277
|
end
|
data/spec/data/always_skip.otml
CHANGED
data/spec/data/xml_entities.otml
CHANGED
@@ -211,7 +211,7 @@ describe 'Java Proxy Cacher' do
|
|
211
211
|
describe 'special cases' do
|
212
212
|
it 'should not unencode xml entities that are not part of a url' do
|
213
213
|
expected_files = []
|
214
|
-
expected_files << "
|
214
|
+
expected_files << "40f8f0e37503367ea32732b9a357722b6a750d0e" # xml_entities.otml
|
215
215
|
expected_files << 'd1cea238486aeeba9215d56bf71efc243754fe48' # resources/chart_line.png
|
216
216
|
expected_files << expected_files.collect{|f| f+".hdrs" } # headers for each file
|
217
217
|
expected_files.flatten!
|
@@ -224,11 +224,27 @@ describe 'Java Proxy Cacher' do
|
|
224
224
|
exists?(f)
|
225
225
|
end
|
226
226
|
|
227
|
-
file_content = File.read(File.join(@cache,'
|
227
|
+
file_content = File.read(File.join(@cache,'40f8f0e37503367ea32732b9a357722b6a750d0e'))
|
228
228
|
|
229
229
|
file_content.should match(Regexp.new('<OTText text="<img src="http://portal.concord.org/images/icons/chart_line.png" />" />'))
|
230
230
|
end
|
231
231
|
|
232
|
+
it 'should maintain newlines occurring at the end of the file' do
|
233
|
+
cache('xml_entities.otml')
|
234
|
+
|
235
|
+
file_content = File.read(File.join(@cache,'40f8f0e37503367ea32732b9a357722b6a750d0e'))
|
236
|
+
|
237
|
+
file_content.should match(/\n\n$/m)
|
238
|
+
end
|
239
|
+
|
240
|
+
it 'should find a src= reference when there is an absolute url on the same line' do
|
241
|
+
cache('flash_file.otml')
|
242
|
+
|
243
|
+
cache_size.should == 13
|
244
|
+
|
245
|
+
exists?('2e867d0a681370b8debb0a7981915c0f8f6de33b') # radishes.html
|
246
|
+
exists?('e04e4e2fdfb39c5b8776fa365bd9ac4fdb3851d5') # radishes.swf
|
247
|
+
end
|
232
248
|
end
|
233
249
|
|
234
250
|
describe 'url map' do
|
@@ -251,5 +267,19 @@ describe 'Java Proxy Cacher' do
|
|
251
267
|
url_map_content.should match(Regexp.new("<entry key='#{e[:key]}'>#{e[:val]}</entry>"))
|
252
268
|
end
|
253
269
|
end
|
270
|
+
|
271
|
+
it 'should list both urls when the content is the same' do
|
272
|
+
expected_entries = []
|
273
|
+
expected_entries << {:key => 'http://udl.concord.org/artwork/elect_34/red_positive_charge/el_34_red_positive_charge.png', :val => '0cb63d1b4b57af2b8fa671854caa707da5390a80'}
|
274
|
+
expected_entries << {:key => 'http://udl.concord.org/artwork/elect_34/red_postive_charge/el_34_red_positive_charge.png', :val => '0cb63d1b4b57af2b8fa671854caa707da5390a80'}
|
275
|
+
|
276
|
+
cache('same_content.otml')
|
277
|
+
|
278
|
+
url_map_content = File.read(File.expand_path('url_map.xml', @cache))
|
279
|
+
|
280
|
+
expected_entries.each do |e|
|
281
|
+
url_map_content.should match(Regexp.new("<entry key='#{e[:key]}'>#{e[:val]}</entry>"))
|
282
|
+
end
|
283
|
+
end
|
254
284
|
end
|
255
285
|
end
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
metadata.gz.sig
CHANGED
Binary file
|