RubyGems - datacatalog-importer - Versions diffs - 0.1.13 → 0.1.14 - Mend

datacatalog-importer 0.1.13 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.1.13
1	+ 0.1.14

data/datacatalog-importer.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = %q{datacatalog-importer}
-  s.version = "0.1.13"
+  s.version = "0.1.14"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["David James"]
-  s.date = %q{2010-04-30}
+  s.date = %q{2010-05-04}
   s.description = %q{This framework makes it easier to write importers for the National Data Catalog.}
   s.email = %q{djames@sunlightfoundation.com}
   s.extra_rdoc_files = [

data/lib/importer.rb CHANGED Viewed

@@ -1,7 +1,5 @@
 module DataCatalog
   module ImporterFramework
     class Error < RuntimeError; end
   end
 end

data/lib/puller.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require File.dirname(__FILE__) + '/shared'
 module DataCatalog
   module ImporterFramework
     class Puller
-      include DataCatalog::ImporterFramework::Shared
+      include Shared
       REQUIRED = %w(cache_folder pullers)

data/lib/pusher.rb CHANGED Viewed

@@ -6,7 +6,7 @@ require File.dirname(__FILE__) + '/shared'
 module DataCatalog
   module ImporterFramework
     class Pusher
-      include DataCatalog::ImporterFramework::Shared
+      include Shared
       REQUIRED = %w(api_key base_uri cache_folder)

data/lib/shared.rb CHANGED Viewed

@@ -1,7 +1,6 @@
 module DataCatalog
   module ImporterFramework
     module Shared
       def folder(resource)
         unless @options
           raise Error, "@options is undefined"
@@ -11,7 +10,6 @@ module DataCatalog
         end
         File.join(@options[:cache_folder], resource.to_s)
       end
     end
   end
 end

data/lib/sort_yaml_hash.rb CHANGED Viewed

@@ -1,7 +1,6 @@
 require 'yaml'
 class Hash
   def to_yaml(opts = {})
     YAML::quick_emit(object_id, opts) do |out|
       out.map(taguri, to_yaml_style) do |map|
@@ -10,13 +9,10 @@ class Hash
       end
     end
   end
 end
 class Symbol
   def <=>(other)
     self.to_s <=> other.to_s
   end
 end

data/lib/tasks.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module DataCatalog
         desc "Pull data from the #{options[:name]}"
         task :pull do
           puts "Pulling data from the #{options[:name]}..."
-          puller = DataCatalog::ImporterFramework::Puller.new({
+          puller = Puller.new({
             :cache_folder => options[:cache_folder],
             :pullers      => options[:pullers],
           })
@@ -22,7 +22,7 @@ module DataCatalog
         desc "Push data to the Data Catalog API"
         task :push do
           desc "Pushing data to the Data Catalog API..."
-          pusher = DataCatalog::ImporterFramework::Pusher.new({
+          pusher = Pusher.new({
             :api_key      => options[:api_key],
             :base_uri     => options[:base_uri],
             :cache_folder => options[:cache_folder],

data/lib/utility.rb CHANGED Viewed

@@ -3,150 +3,160 @@ require 'nokogiri'
 require 'open-uri'
 module DataCatalog
-  class Utility
+  module ImporterFramework
+    class Utility
-    # == URLs ==
+      # == URLs ==
-    def self.absolute_url(page_url, url)
-      Utility.plain_string(URI.parse(page_url).merge(url).to_s)
-    end
+      def self.absolute_url(page_url, url)
+        Utility.plain_string(URI.parse(page_url).merge(url).to_s)
+      end
-    # == Cleaning ==
+      # == Cleaning ==
-    def self.single_line_clean(s)
-      plain_string(
-        s.gsub(/[\r\n\t]/, " ").gsub(/[\x80-\xFF]/, "").squeeze(" ").strip)
-    end
+      def self.single_line_clean(s)
+        plain_string(
+          s.gsub(/[\r\n\t]/, " ").gsub(/[\x80-\xFF]/, "").squeeze(" ").strip)
+      end
-    def self.multi_line_clean(s)
-      plain_string(
-        s.gsub(/[\x80-\xFF]/, "").squeeze(" ").strip)
-    end
+      def self.multi_line_clean(s)
+        plain_string(
+          s.gsub(/[\x80-\xFF]/, "").squeeze(" ").strip)
+      end
-    # ActiveSupport 2.3.5 adds @_rails_html_safe aggressively.
-    # This method removes it so you can output clean YAML.
-    def self.plain_string(s)
-      if s.instance_variable_defined?(:@_rails_html_safe)
-        s.send(:remove_instance_variable, :@_rails_html_safe)
+      # ActiveSupport 2.3.5 adds @_rails_html_safe aggressively.
+      # This method removes it so you can output clean YAML.
+      def self.plain_string(s)
+        if s.instance_variable_defined?(:@_rails_html_safe)
+          s.send(:remove_instance_variable, :@_rails_html_safe)
+        end
+        s
       end
-      s
-    end
-    # == API ===
+      # == API ===
-    def self.setup_api(api_key, base_uri)
-      DataCatalog.api_key  = api_key
-      DataCatalog.base_uri = base_uri
-    end
+      def self.setup_api(api_key, base_uri)
+        DataCatalog.api_key  = api_key
+        DataCatalog.base_uri = base_uri
+      end
-    def self.headers
-      {
-        "UserAgent" => "National Data Catalog Importer/0.1.6",
-      }
-    end
+      def self.headers
+        {
+          "UserAgent" => "National Data Catalog Importer/0.1.6",
+        }
+      end
-    # == Various ==
-    def self.fetch(uri)
-      puts "Fetching #{uri}..."
-      io = open(uri, headers)
-      io.read
-    end
+      # == Various ==
+      def self.fetch(uri, max_attempts=3)
+        attempts = 0
+        loop do
+          begin
+            puts "Fetching #{uri}..."
+            io = open(uri, headers)
+            return io.read
+          rescue SocketError
+            attempts += 1
+            puts "  Attempt ##{attempts} failed."
+            break if attempts >= max_attempts
+          end
+        end
+      end
-    def self.report_timing(label)
-      puts "Starting: [#{label}]"
-      t0 = Time.now
-      result = yield
-      t1 = Time.now
-      diff = t1 - t0
-      puts "Elapsed time [#{label}] %.2f s" % diff
-      result
-    end
+      def self.report_timing(label)
+        puts "Starting: [#{label}]"
+        t0 = Time.now
+        result = yield
+        t1 = Time.now
+        diff = t1 - t0
+        puts "Elapsed time [#{label}] %.2f s" % diff
+        result
+      end
-    # == CSV ==
-    # { :headers => true } is a common option
-    def self.parse_csv_from_file(filename, options={})
-      extra_header_rows = options.delete(:extra_header_rows) || 0
-      File.open(filename) do |f|
-        extra_header_rows.times { f.gets } # ignore these rows
-        FasterCSV.parse(f, options)
+      # == CSV ==
+      # { :headers => true } is a common option
+      def self.parse_csv_from_file(filename, options={})
+        extra_header_rows = options.delete(:extra_header_rows) || 0
+        File.open(filename) do |f|
+          extra_header_rows.times { f.gets } # ignore these rows
+          FasterCSV.parse(f, options)
+        end
       end
-    end
-    def self.parse_csv_from_uri(uri, options={})
-      puts "Fetching #{uri}..."
-      data = open(uri, headers)
-      puts data.inspect
-      FasterCSV.parse(data, options)
-    end
+      def self.parse_csv_from_uri(uri, options={})
+        puts "Fetching #{uri}..."
+        data = open(uri, headers)
+        puts data.inspect
+        FasterCSV.parse(data, options)
+      end
-    def self.parse_csv_from_file_or_uri(uri, file, options={})
-      force_fetch = options.delete(:force_fetch) || false
-      if force_fetch || !File.exist?(file)
-        document = fetch(uri)
-        File.open(file, "w") { |f| f.write(document) }
+      def self.parse_csv_from_file_or_uri(uri, file, options={})
+        force_fetch = options.delete(:force_fetch) || false
+        if force_fetch || !File.exist?(file)
+          document = fetch(uri)
+          File.open(file, "w") { |f| f.write(document) }
+        end
+        parse_csv_from_file(file, options)
       end
-      parse_csv_from_file(file, options)
-    end
-    # == HTML ==
+      # == HTML ==
-    def self.parse_html_from_file(filename)
-      File.open(filename) do |f|
-        Nokogiri::HTML::Document.parse(f)
+      def self.parse_html_from_file(filename)
+        File.open(filename) do |f|
+          Nokogiri::HTML::Document.parse(f)
+        end
       end
-    end
-    def self.parse_html_from_uri(uri)
-      puts "Fetching #{uri}..."
-      open(uri, headers) do |io|
-        Nokogiri::HTML::Document.parse(io)
+      def self.parse_html_from_uri(uri)
+        puts "Fetching #{uri}..."
+        open(uri, headers) do |io|
+          Nokogiri::HTML::Document.parse(io)
+        end
       end
-    end
-    def self.parse_html_from_file_or_uri(uri, file, options={})
-      if options[:force_fetch] || !File.exist?(file)
-        document = parse_html_from_uri(uri)
-        File.open(file, "w") { |f| f.write(document) }
+      def self.parse_html_from_file_or_uri(uri, file, options={})
+        if options[:force_fetch] || !File.exist?(file)
+          document = parse_html_from_uri(uri)
+          File.open(file, "w") { |f| f.write(document) }
+        end
+        parse_html_from_file(file) # Why always parse the file? See Note 001, below.
       end
-      parse_html_from_file(file) # Why always parse the file? See Note 001, below.
-    end
-    # == XML
+      # == XML
-    def self.parse_xml_from_file(filename)
-      File.open(filename) do |f|
-        Nokogiri::XML::Document.parse(f)
+      def self.parse_xml_from_file(filename)
+        File.open(filename) do |f|
+          Nokogiri::XML::Document.parse(f)
+        end
       end
-    end
-    def self.parse_xml_from_uri(uri)
-    	puts "Fetching #{uri}..."
-    	Nokogiri::XML(open(uri))
-    end
+      def self.parse_xml_from_uri(uri)
+      	puts "Fetching #{uri}..."
+      	Nokogiri::XML(open(uri))
+      end
-    def self.parse_xml_from_file_or_uri(uri, file, options={})
-    	if options[:force_fetch] || !File.exist?(file)
-    		document = parse_xml_from_uri(uri)
-    		File.open(file, "w") { |f| f.write(document) }
-    	end
-    	parse_xml_from_file(file) # Why always parse the file? See Note 001, below.
-    end
+      def self.parse_xml_from_file_or_uri(uri, file, options={})
+      	if options[:force_fetch] || !File.exist?(file)
+      		document = parse_xml_from_uri(uri)
+      		File.open(file, "w") { |f| f.write(document) }
+      	end
+      	parse_xml_from_file(file) # Why always parse the file? See Note 001, below.
+      end
-    # == YAML
+      # == YAML
-    # To load YAML use: YAML::load_file(filename)
+      # To load YAML use: YAML::load_file(filename)
-    def self.write_yaml(filename, contents)
-      File.open(filename, "w") do |f|
-        YAML::dump(contents, f)
+      def self.write_yaml(filename, contents)
+        File.open(filename, "w") do |f|
+          YAML::dump(contents, f)
+        end
       end
     end
   end
 end
 # == Note 001 ==

metadata CHANGED Viewed

@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
   - 0
   - 1
-  - 13
-  version: 0.1.13
+  - 14
+  version: 0.1.14
 platform: ruby
 authors:
 - David James
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-04-30 00:00:00 -04:00
+date: 2010-05-04 00:00:00 -04:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency