RubyGems - concord_cacher - Versions diffs - 0.1.9 → 0.1.10 - Mend

concord_cacher 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/Rakefile +2 -1
data/concord_cacher.gemspec +1 -1
data/lib/concord/resource.rb +23 -13
data/spec/data/always_skip.otml +1 -0
data/spec/data/xml_entities.otml +2 -1
data/spec/diy_local_cacher_spec.rb +1 -0
data/spec/java_proxy_cacher_spec.rb +32 -2
data.tar.gz.sig +0 -0
metadata +2 -2
metadata.gz.sig +0 -0

data/Rakefile CHANGED Viewed

@@ -6,7 +6,7 @@ require 'spec/rake/spectask'
 require './lib/concord_cacher.rb'
 require 'echoe'
-Echoe.new('concord_cacher', '0.1.9') do |p|
+Echoe.new('concord_cacher', '0.1.10') do |p|
   p.description    = "concord_cacher provides support for locally caching a resource and all referenced resources in multiple different ways. It is intended for using with other Concord Consortium projects and not necessarily for outside projects."
   p.summary        = "Support for locally caching a resource and all referenced resources in multiple different ways"
   p.url            = "http://github.com/psndcsrv/concord_cacher"
@@ -19,6 +19,7 @@ Echoe.new('concord_cacher', '0.1.9') do |p|
 end
 task :default => :spec
 Spec::Rake::SpecTask.new do |t|
   t.spec_files = FileList["spec/**/*_spec.rb"]
 end

data/concord_cacher.gemspec CHANGED Viewed

@@ -2,7 +2,7 @@
 Gem::Specification.new do |s|
   s.name = %q{concord_cacher}
-  s.version = "0.1.9"
+  s.version = "0.1.10"
   s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
   s.authors = ["Aaron Unger"]

data/lib/concord/resource.rb CHANGED Viewed

@@ -29,6 +29,7 @@ class ::Concord::Resource
   ALWAYS_SKIP_REGEXES = []
   ALWAYS_SKIP_REGEXES << Regexp.new(/^(mailto|jres)/i)
   ALWAYS_SKIP_REGEXES << Regexp.new(/http[s]?:\/\/.*?w3\.org\//i)
+  ALWAYS_SKIP_REGEXES << Regexp.new(/http[s]?:\/\/xml.apache.org\/xslt/i)
   RECURSE_ONCE_REGEX = /html$/i
   RECURSE_FOREVER_REGEX = /(otml|cml|mml|nlogo)$/i
@@ -69,6 +70,10 @@ class ::Concord::Resource
     @url_map[k] = v
   end
+  def self.unmap(k)
+    @url_map.delete(k)
+  end
   def self.error(u,str)
     @errors[u] ||= []
     @errors[u] << str
@@ -88,7 +93,7 @@ class ::Concord::Resource
       f.flush
     end
     write_property_map(self.cache_dir + self.local_filename + ".hdrs", self.headers) if self.class.cache_headers
-    ::Concord::Resource.map(self.uri_str, self.local_filename) if self.class.create_map
   end
   # Reserving the file will prohibit any further references to this same file to be skipped, this avoiding endlessly recursing references
@@ -98,6 +103,7 @@ class ::Concord::Resource
   def release
     FileUtils.rm(self.cache_dir + @local_filename) if @local_filename
+    ::Concord::Resource.unmap(self.uri_str) if self.class.create_map
   end
   def exists?
@@ -109,8 +115,9 @@ class ::Concord::Resource
       self.headers = r.respond_to?("meta") ? r.meta : {}
       self.headers['_http_version'] = "HTTP/1.1 #{r.respond_to?("status") ? r.status.join(" ") : "200 OK"}"
       self.content = r.read
-      self.remove_codebase if self.class.rewrite_urls
     end
+    self.remove_codebase if self.class.rewrite_urls
+    ::Concord::Resource.map(self.uri_str, self.local_filename) if self.class.create_map
   end
   def has_codebase?
@@ -125,13 +132,14 @@ class ::Concord::Resource
   def process
     print "\n#{self.remote_filename}: " if self.class.verbose
     processed_lines = []
+    ending_newlines = self.content[/([\n]+)$/m, 1]
     lines = self.content.split("\n")
     lines.each do |line|
       processed_lines << _process_line(line)
     end
     print ".\n" if self.class.verbose
-    self.content = processed_lines.join("\n")
+    self.content = processed_lines.join("\n") + (ending_newlines || '')
   end
   def uri_str
@@ -167,31 +175,31 @@ class ::Concord::Resource
   private
   def _line_matches(line)
-    return ( URL_REGEX.match(line) ||
-             SRC_REGEX.match(line) ||
-             _line_matches_by_file(line)
-      )
+    urls = line.scan(URL_REGEX)
+    urls += line.scan(SRC_REGEX)
+    urls += _line_matches_by_file(line)
+    return urls.flatten.compact.uniq || []
   end
   def _line_matches_by_file(line)
     reg = FILE_SPECIFIC_REGEXES.detect{|r,v| r.match(self.remote_filename)}
     # reg[0] is the file regex, reg[1] is an array of regexes for that file type
     if reg
-      return reg[1].map{|r2| r2.match(line) }.compact.first
+      return reg[1].map{|r2| line.scan(r2) }
     else
-      return nil
+      return []
     end
   end
   def _process_line(line)
     orig_line = line
     line = CGI.unescapeHTML(line)
-    match_indexes = []
-    while ( match = _line_matches(line) ) && (! match_indexes.include?(match.begin(1)))
+    matches = _line_matches(line)
+    matches.each do |match|
       print "\nMatched url: #{match[1]}: " if self.class.debug
-      match_indexes << match.begin(1)
       resource = Concord::Resource.new
-      resource.url = match[1]
+      resource.url = match
       resource.cache_dir = self.cache_dir
       catch :nextResource do
         _handle_resource(resource)
@@ -262,6 +270,8 @@ class ::Concord::Resource
       print 'X' if self.class.verbose
       resource.release
       throw :nextResource
+    rescue Timeout::Error
+      retry
     end
   end
 end

data/spec/data/always_skip.otml CHANGED Viewed

@@ -9,6 +9,7 @@
           <!-- skip w3.org urls -->
           http://www.w3.org/1999/xhtml
           http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd
+          http://xml.apache.org/xslt
         </OTText>
       </root>
     </OTSystem>

data/spec/data/xml_entities.otml CHANGED Viewed

@@ -9,4 +9,5 @@
       </root>
     </OTSystem>
   </objects>
-</otrunk>
+</otrunk>

data/spec/diy_local_cacher_spec.rb CHANGED Viewed

@@ -311,6 +311,7 @@ describe 'DIY Local Cacher' do
       expected_filename = 'hash.otml'
       cache('always_skip.otml', :activity => mockup('always_skip.otml'))
       cache_size.should == 1
+      ::Concord::Resource.errors.size.should == 0
     end
   end

data/spec/java_proxy_cacher_spec.rb CHANGED Viewed

@@ -211,7 +211,7 @@ describe 'Java Proxy Cacher' do
   describe 'special cases' do
     it 'should not unencode xml entities that are not part of a url' do
       expected_files = []
-      expected_files << "b3dd880c1368ff9ed70cba3df3b81cd04bf77bdf" # xml_entities.otml
+      expected_files << "40f8f0e37503367ea32732b9a357722b6a750d0e" # xml_entities.otml
       expected_files << 'd1cea238486aeeba9215d56bf71efc243754fe48' # resources/chart_line.png
       expected_files << expected_files.collect{|f| f+".hdrs" } # headers for each file
       expected_files.flatten!
@@ -224,11 +224,27 @@ describe 'Java Proxy Cacher' do
         exists?(f)
       end
-      file_content = File.read(File.join(@cache,'b3dd880c1368ff9ed70cba3df3b81cd04bf77bdf'))
+      file_content = File.read(File.join(@cache,'40f8f0e37503367ea32732b9a357722b6a750d0e'))
       file_content.should match(Regexp.new('<OTText text="&lt;img src=&quot;http://portal.concord.org/images/icons/chart_line.png&quot; /&gt;" />'))
     end
+    it 'should maintain newlines occurring at the end of the file' do
+      cache('xml_entities.otml')
+      file_content = File.read(File.join(@cache,'40f8f0e37503367ea32732b9a357722b6a750d0e'))
+      file_content.should match(/\n\n$/m)
+    end
+    it 'should find a src= reference when there is an absolute url on the same line' do
+      cache('flash_file.otml')
+      cache_size.should == 13
+      exists?('2e867d0a681370b8debb0a7981915c0f8f6de33b') # radishes.html
+      exists?('e04e4e2fdfb39c5b8776fa365bd9ac4fdb3851d5') # radishes.swf
+    end
   end
   describe 'url map' do
@@ -251,5 +267,19 @@ describe 'Java Proxy Cacher' do
         url_map_content.should match(Regexp.new("<entry key='#{e[:key]}'>#{e[:val]}</entry>"))
       end
     end
+    it 'should list both urls when the content is the same' do
+      expected_entries = []
+      expected_entries << {:key => 'http://udl.concord.org/artwork/elect_34/red_positive_charge/el_34_red_positive_charge.png', :val => '0cb63d1b4b57af2b8fa671854caa707da5390a80'}
+      expected_entries << {:key => 'http://udl.concord.org/artwork/elect_34/red_postive_charge/el_34_red_positive_charge.png', :val => '0cb63d1b4b57af2b8fa671854caa707da5390a80'}
+      cache('same_content.otml')
+      url_map_content = File.read(File.expand_path('url_map.xml', @cache))
+      expected_entries.each do |e|
+        url_map_content.should match(Regexp.new("<entry key='#{e[:key]}'>#{e[:val]}</entry>"))
+      end
+    end
   end
 end

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
   - 0
   - 1
-  - 9
-  version: 0.1.9
+  - 10
+  version: 0.1.10
 platform: ruby
 authors:
 - Aaron Unger

metadata.gz.sig CHANGED Viewed

Binary file