concord_cacher 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -6,7 +6,7 @@ require 'spec/rake/spectask'
6
6
  require './lib/concord_cacher.rb'
7
7
 
8
8
  require 'echoe'
9
- Echoe.new('concord_cacher', '0.1.9') do |p|
9
+ Echoe.new('concord_cacher', '0.1.10') do |p|
10
10
  p.description = "concord_cacher provides support for locally caching a resource and all referenced resources in multiple different ways. It is intended for using with other Concord Consortium projects and not necessarily for outside projects."
11
11
  p.summary = "Support for locally caching a resource and all referenced resources in multiple different ways"
12
12
  p.url = "http://github.com/psndcsrv/concord_cacher"
@@ -19,6 +19,7 @@ Echoe.new('concord_cacher', '0.1.9') do |p|
19
19
  end
20
20
 
21
21
  task :default => :spec
22
+
22
23
  Spec::Rake::SpecTask.new do |t|
23
24
  t.spec_files = FileList["spec/**/*_spec.rb"]
24
25
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{concord_cacher}
5
- s.version = "0.1.9"
5
+ s.version = "0.1.10"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Aaron Unger"]
@@ -29,6 +29,7 @@ class ::Concord::Resource
29
29
  ALWAYS_SKIP_REGEXES = []
30
30
  ALWAYS_SKIP_REGEXES << Regexp.new(/^(mailto|jres)/i)
31
31
  ALWAYS_SKIP_REGEXES << Regexp.new(/http[s]?:\/\/.*?w3\.org\//i)
32
+ ALWAYS_SKIP_REGEXES << Regexp.new(/http[s]?:\/\/xml.apache.org\/xslt/i)
32
33
 
33
34
  RECURSE_ONCE_REGEX = /html$/i
34
35
  RECURSE_FOREVER_REGEX = /(otml|cml|mml|nlogo)$/i
@@ -69,6 +70,10 @@ class ::Concord::Resource
69
70
  @url_map[k] = v
70
71
  end
71
72
 
73
+ def self.unmap(k)
74
+ @url_map.delete(k)
75
+ end
76
+
72
77
  def self.error(u,str)
73
78
  @errors[u] ||= []
74
79
  @errors[u] << str
@@ -88,7 +93,7 @@ class ::Concord::Resource
88
93
  f.flush
89
94
  end
90
95
  write_property_map(self.cache_dir + self.local_filename + ".hdrs", self.headers) if self.class.cache_headers
91
- ::Concord::Resource.map(self.uri_str, self.local_filename) if self.class.create_map
96
+
92
97
  end
93
98
 
94
99
  # Reserving the file will prohibit any further references to this same file to be skipped, this avoiding endlessly recursing references
@@ -98,6 +103,7 @@ class ::Concord::Resource
98
103
 
99
104
  def release
100
105
  FileUtils.rm(self.cache_dir + @local_filename) if @local_filename
106
+ ::Concord::Resource.unmap(self.uri_str) if self.class.create_map
101
107
  end
102
108
 
103
109
  def exists?
@@ -109,8 +115,9 @@ class ::Concord::Resource
109
115
  self.headers = r.respond_to?("meta") ? r.meta : {}
110
116
  self.headers['_http_version'] = "HTTP/1.1 #{r.respond_to?("status") ? r.status.join(" ") : "200 OK"}"
111
117
  self.content = r.read
112
- self.remove_codebase if self.class.rewrite_urls
113
118
  end
119
+ self.remove_codebase if self.class.rewrite_urls
120
+ ::Concord::Resource.map(self.uri_str, self.local_filename) if self.class.create_map
114
121
  end
115
122
 
116
123
  def has_codebase?
@@ -125,13 +132,14 @@ class ::Concord::Resource
125
132
  def process
126
133
  print "\n#{self.remote_filename}: " if self.class.verbose
127
134
  processed_lines = []
135
+ ending_newlines = self.content[/([\n]+)$/m, 1]
128
136
  lines = self.content.split("\n")
129
137
  lines.each do |line|
130
138
  processed_lines << _process_line(line)
131
139
  end
132
140
 
133
141
  print ".\n" if self.class.verbose
134
- self.content = processed_lines.join("\n")
142
+ self.content = processed_lines.join("\n") + (ending_newlines || '')
135
143
  end
136
144
 
137
145
  def uri_str
@@ -167,31 +175,31 @@ class ::Concord::Resource
167
175
  private
168
176
 
169
177
  def _line_matches(line)
170
- return ( URL_REGEX.match(line) ||
171
- SRC_REGEX.match(line) ||
172
- _line_matches_by_file(line)
173
- )
178
+ urls = line.scan(URL_REGEX)
179
+ urls += line.scan(SRC_REGEX)
180
+ urls += _line_matches_by_file(line)
181
+
182
+ return urls.flatten.compact.uniq || []
174
183
  end
175
184
 
176
185
  def _line_matches_by_file(line)
177
186
  reg = FILE_SPECIFIC_REGEXES.detect{|r,v| r.match(self.remote_filename)}
178
187
  # reg[0] is the file regex, reg[1] is an array of regexes for that file type
179
188
  if reg
180
- return reg[1].map{|r2| r2.match(line) }.compact.first
189
+ return reg[1].map{|r2| line.scan(r2) }
181
190
  else
182
- return nil
191
+ return []
183
192
  end
184
193
  end
185
194
 
186
195
  def _process_line(line)
187
196
  orig_line = line
188
197
  line = CGI.unescapeHTML(line)
189
- match_indexes = []
190
- while ( match = _line_matches(line) ) && (! match_indexes.include?(match.begin(1)))
198
+ matches = _line_matches(line)
199
+ matches.each do |match|
191
200
  print "\nMatched url: #{match[1]}: " if self.class.debug
192
- match_indexes << match.begin(1)
193
201
  resource = Concord::Resource.new
194
- resource.url = match[1]
202
+ resource.url = match
195
203
  resource.cache_dir = self.cache_dir
196
204
  catch :nextResource do
197
205
  _handle_resource(resource)
@@ -262,6 +270,8 @@ class ::Concord::Resource
262
270
  print 'X' if self.class.verbose
263
271
  resource.release
264
272
  throw :nextResource
273
+ rescue Timeout::Error
274
+ retry
265
275
  end
266
276
  end
267
277
  end
@@ -9,6 +9,7 @@
9
9
  <!-- skip w3.org urls -->
10
10
  http://www.w3.org/1999/xhtml
11
11
  http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd
12
+ http://xml.apache.org/xslt
12
13
  </OTText>
13
14
  </root>
14
15
  </OTSystem>
@@ -9,4 +9,5 @@
9
9
  </root>
10
10
  </OTSystem>
11
11
  </objects>
12
- </otrunk>
12
+ </otrunk>
13
+
@@ -311,6 +311,7 @@ describe 'DIY Local Cacher' do
311
311
  expected_filename = 'hash.otml'
312
312
  cache('always_skip.otml', :activity => mockup('always_skip.otml'))
313
313
  cache_size.should == 1
314
+ ::Concord::Resource.errors.size.should == 0
314
315
  end
315
316
  end
316
317
 
@@ -211,7 +211,7 @@ describe 'Java Proxy Cacher' do
211
211
  describe 'special cases' do
212
212
  it 'should not unencode xml entities that are not part of a url' do
213
213
  expected_files = []
214
- expected_files << "b3dd880c1368ff9ed70cba3df3b81cd04bf77bdf" # xml_entities.otml
214
+ expected_files << "40f8f0e37503367ea32732b9a357722b6a750d0e" # xml_entities.otml
215
215
  expected_files << 'd1cea238486aeeba9215d56bf71efc243754fe48' # resources/chart_line.png
216
216
  expected_files << expected_files.collect{|f| f+".hdrs" } # headers for each file
217
217
  expected_files.flatten!
@@ -224,11 +224,27 @@ describe 'Java Proxy Cacher' do
224
224
  exists?(f)
225
225
  end
226
226
 
227
- file_content = File.read(File.join(@cache,'b3dd880c1368ff9ed70cba3df3b81cd04bf77bdf'))
227
+ file_content = File.read(File.join(@cache,'40f8f0e37503367ea32732b9a357722b6a750d0e'))
228
228
 
229
229
  file_content.should match(Regexp.new('<OTText text="&lt;img src=&quot;http://portal.concord.org/images/icons/chart_line.png&quot; /&gt;" />'))
230
230
  end
231
231
 
232
+ it 'should maintain newlines occurring at the end of the file' do
233
+ cache('xml_entities.otml')
234
+
235
+ file_content = File.read(File.join(@cache,'40f8f0e37503367ea32732b9a357722b6a750d0e'))
236
+
237
+ file_content.should match(/\n\n$/m)
238
+ end
239
+
240
+ it 'should find a src= reference when there is an absolute url on the same line' do
241
+ cache('flash_file.otml')
242
+
243
+ cache_size.should == 13
244
+
245
+ exists?('2e867d0a681370b8debb0a7981915c0f8f6de33b') # radishes.html
246
+ exists?('e04e4e2fdfb39c5b8776fa365bd9ac4fdb3851d5') # radishes.swf
247
+ end
232
248
  end
233
249
 
234
250
  describe 'url map' do
@@ -251,5 +267,19 @@ describe 'Java Proxy Cacher' do
251
267
  url_map_content.should match(Regexp.new("<entry key='#{e[:key]}'>#{e[:val]}</entry>"))
252
268
  end
253
269
  end
270
+
271
+ it 'should list both urls when the content is the same' do
272
+ expected_entries = []
273
+ expected_entries << {:key => 'http://udl.concord.org/artwork/elect_34/red_positive_charge/el_34_red_positive_charge.png', :val => '0cb63d1b4b57af2b8fa671854caa707da5390a80'}
274
+ expected_entries << {:key => 'http://udl.concord.org/artwork/elect_34/red_postive_charge/el_34_red_positive_charge.png', :val => '0cb63d1b4b57af2b8fa671854caa707da5390a80'}
275
+
276
+ cache('same_content.otml')
277
+
278
+ url_map_content = File.read(File.expand_path('url_map.xml', @cache))
279
+
280
+ expected_entries.each do |e|
281
+ url_map_content.should match(Regexp.new("<entry key='#{e[:key]}'>#{e[:val]}</entry>"))
282
+ end
283
+ end
254
284
  end
255
285
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 9
9
- version: 0.1.9
8
+ - 10
9
+ version: 0.1.10
10
10
  platform: ruby
11
11
  authors:
12
12
  - Aaron Unger
metadata.gz.sig CHANGED
Binary file