datacatalog-importer 0.1.19 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
 - data/Rakefile +3 -3
 - data/VERSION +1 -1
 - data/datacatalog-importer.gemspec +16 -11
 - data/example/README.md +5 -0
 - data/example/config.example.yml +3 -0
 - data/example/lib/puller.rb +71 -0
 - data/example/rakefile.rb +22 -0
 - data/lib/datacatalog-importer.rb +1 -0
 - data/lib/handler.rb +33 -0
 - data/lib/puller.rb +5 -46
 - data/lib/shared.rb +2 -0
 - data/lib/tasks.rb +1 -1
 - data/lib/utility.rb +1 -1
 - data/spec/spec_helper.rb +1 -0
 - metadata +26 -10
 
    
        data/.gitignore
    CHANGED
    
    
    
        data/Rakefile
    CHANGED
    
    | 
         @@ -8,10 +8,10 @@ begin 
     | 
|
| 
       8 
8 
     | 
    
         
             
                gem.summary = %Q{A framework to write National Data Catalog importers}
         
     | 
| 
       9 
9 
     | 
    
         
             
                gem.description = %Q{This framework makes it easier to write importers for the National Data Catalog.}
         
     | 
| 
       10 
10 
     | 
    
         
             
                gem.email = "djames@sunlightfoundation.com"
         
     | 
| 
       11 
     | 
    
         
            -
                gem.homepage = "http://github.com/ 
     | 
| 
      
 11 
     | 
    
         
            +
                gem.homepage = "http://github.com/sunlightlabs/datacatalog-importer"
         
     | 
| 
       12 
12 
     | 
    
         
             
                gem.authors = ["David James"]
         
     | 
| 
       13 
     | 
    
         
            -
                gem.add_dependency "nokogiri", ">= 1.4. 
     | 
| 
       14 
     | 
    
         
            -
                gem.add_dependency "datacatalog", ">= 0.4. 
     | 
| 
      
 13 
     | 
    
         
            +
                gem.add_dependency "nokogiri", ">= 1.4.2"
         
     | 
| 
      
 14 
     | 
    
         
            +
                gem.add_dependency "datacatalog", ">= 0.4.15"
         
     | 
| 
       15 
15 
     | 
    
         
             
                gem.add_development_dependency "rspec", ">= 1.2.9"
         
     | 
| 
       16 
16 
     | 
    
         
             
                # gem is a Gem::Specification...
         
     | 
| 
       17 
17 
     | 
    
         
             
                # see http://www.rubygems.org/read/chapter/20 for additional settings
         
     | 
    
        data/VERSION
    CHANGED
    
    | 
         @@ -1 +1 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            0. 
     | 
| 
      
 1 
     | 
    
         
            +
            0.2.0
         
     | 
| 
         @@ -5,11 +5,11 @@ 
     | 
|
| 
       5 
5 
     | 
    
         | 
| 
       6 
6 
     | 
    
         
             
            Gem::Specification.new do |s|
         
     | 
| 
       7 
7 
     | 
    
         
             
              s.name = %q{datacatalog-importer}
         
     | 
| 
       8 
     | 
    
         
            -
              s.version = "0. 
     | 
| 
      
 8 
     | 
    
         
            +
              s.version = "0.2.0"
         
     | 
| 
       9 
9 
     | 
    
         | 
| 
       10 
10 
     | 
    
         
             
              s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
         
     | 
| 
       11 
11 
     | 
    
         
             
              s.authors = ["David James"]
         
     | 
| 
       12 
     | 
    
         
            -
              s.date = %q{2010- 
     | 
| 
      
 12 
     | 
    
         
            +
              s.date = %q{2010-07-08}
         
     | 
| 
       13 
13 
     | 
    
         
             
              s.description = %q{This framework makes it easier to write importers for the National Data Catalog.}
         
     | 
| 
       14 
14 
     | 
    
         
             
              s.email = %q{djames@sunlightfoundation.com}
         
     | 
| 
       15 
15 
     | 
    
         
             
              s.extra_rdoc_files = [
         
     | 
| 
         @@ -24,7 +24,12 @@ Gem::Specification.new do |s| 
     | 
|
| 
       24 
24 
     | 
    
         
             
                 "Rakefile",
         
     | 
| 
       25 
25 
     | 
    
         
             
                 "VERSION",
         
     | 
| 
       26 
26 
     | 
    
         
             
                 "datacatalog-importer.gemspec",
         
     | 
| 
      
 27 
     | 
    
         
            +
                 "example/README.md",
         
     | 
| 
      
 28 
     | 
    
         
            +
                 "example/config.example.yml",
         
     | 
| 
      
 29 
     | 
    
         
            +
                 "example/lib/puller.rb",
         
     | 
| 
      
 30 
     | 
    
         
            +
                 "example/rakefile.rb",
         
     | 
| 
       27 
31 
     | 
    
         
             
                 "lib/datacatalog-importer.rb",
         
     | 
| 
      
 32 
     | 
    
         
            +
                 "lib/handler.rb",
         
     | 
| 
       28 
33 
     | 
    
         
             
                 "lib/importer.rb",
         
     | 
| 
       29 
34 
     | 
    
         
             
                 "lib/puller.rb",
         
     | 
| 
       30 
35 
     | 
    
         
             
                 "lib/pusher.rb",
         
     | 
| 
         @@ -36,10 +41,10 @@ Gem::Specification.new do |s| 
     | 
|
| 
       36 
41 
     | 
    
         
             
                 "spec/spec_helper.rb",
         
     | 
| 
       37 
42 
     | 
    
         
             
                 "spec/utility_spec.rb"
         
     | 
| 
       38 
43 
     | 
    
         
             
              ]
         
     | 
| 
       39 
     | 
    
         
            -
              s.homepage = %q{http://github.com/ 
     | 
| 
      
 44 
     | 
    
         
            +
              s.homepage = %q{http://github.com/sunlightlabs/datacatalog-importer}
         
     | 
| 
       40 
45 
     | 
    
         
             
              s.rdoc_options = ["--charset=UTF-8"]
         
     | 
| 
       41 
46 
     | 
    
         
             
              s.require_paths = ["lib"]
         
     | 
| 
       42 
     | 
    
         
            -
              s.rubygems_version = %q{1.3. 
     | 
| 
      
 47 
     | 
    
         
            +
              s.rubygems_version = %q{1.3.7}
         
     | 
| 
       43 
48 
     | 
    
         
             
              s.summary = %q{A framework to write National Data Catalog importers}
         
     | 
| 
       44 
49 
     | 
    
         
             
              s.test_files = [
         
     | 
| 
       45 
50 
     | 
    
         
             
                "spec/spec_helper.rb",
         
     | 
| 
         @@ -50,18 +55,18 @@ Gem::Specification.new do |s| 
     | 
|
| 
       50 
55 
     | 
    
         
             
                current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
         
     | 
| 
       51 
56 
     | 
    
         
             
                s.specification_version = 3
         
     | 
| 
       52 
57 
     | 
    
         | 
| 
       53 
     | 
    
         
            -
                if Gem::Version.new(Gem:: 
     | 
| 
       54 
     | 
    
         
            -
                  s.add_runtime_dependency(%q<nokogiri>, [">= 1.4. 
     | 
| 
       55 
     | 
    
         
            -
                  s.add_runtime_dependency(%q<datacatalog>, [">= 0.4. 
     | 
| 
      
 58 
     | 
    
         
            +
                if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
         
     | 
| 
      
 59 
     | 
    
         
            +
                  s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.2"])
         
     | 
| 
      
 60 
     | 
    
         
            +
                  s.add_runtime_dependency(%q<datacatalog>, [">= 0.4.15"])
         
     | 
| 
       56 
61 
     | 
    
         
             
                  s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
         
     | 
| 
       57 
62 
     | 
    
         
             
                else
         
     | 
| 
       58 
     | 
    
         
            -
                  s.add_dependency(%q<nokogiri>, [">= 1.4. 
     | 
| 
       59 
     | 
    
         
            -
                  s.add_dependency(%q<datacatalog>, [">= 0.4. 
     | 
| 
      
 63 
     | 
    
         
            +
                  s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
         
     | 
| 
      
 64 
     | 
    
         
            +
                  s.add_dependency(%q<datacatalog>, [">= 0.4.15"])
         
     | 
| 
       60 
65 
     | 
    
         
             
                  s.add_dependency(%q<rspec>, [">= 1.2.9"])
         
     | 
| 
       61 
66 
     | 
    
         
             
                end
         
     | 
| 
       62 
67 
     | 
    
         
             
              else
         
     | 
| 
       63 
     | 
    
         
            -
                s.add_dependency(%q<nokogiri>, [">= 1.4. 
     | 
| 
       64 
     | 
    
         
            -
                s.add_dependency(%q<datacatalog>, [">= 0.4. 
     | 
| 
      
 68 
     | 
    
         
            +
                s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
         
     | 
| 
      
 69 
     | 
    
         
            +
                s.add_dependency(%q<datacatalog>, [">= 0.4.15"])
         
     | 
| 
       65 
70 
     | 
    
         
             
                s.add_dependency(%q<rspec>, [">= 1.2.9"])
         
     | 
| 
       66 
71 
     | 
    
         
             
              end
         
     | 
| 
       67 
72 
     | 
    
         
             
            end
         
     | 
    
        data/example/README.md
    ADDED
    
    
| 
         @@ -0,0 +1,71 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            class Puller
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              ORGS = [
         
     | 
| 
      
 4 
     | 
    
         
            +
                {
         
     | 
| 
      
 5 
     | 
    
         
            +
                  :name        => "Budget Office",
         
     | 
| 
      
 6 
     | 
    
         
            +
                  :url         => "http://example.gov/orgs/budget-office",
         
     | 
| 
      
 7 
     | 
    
         
            +
                  :description => "Prepares the executive budget..."
         
     | 
| 
      
 8 
     | 
    
         
            +
                },
         
     | 
| 
      
 9 
     | 
    
         
            +
                {
         
     | 
| 
      
 10 
     | 
    
         
            +
                  :name        => "Environmental Agency",
         
     | 
| 
      
 11 
     | 
    
         
            +
                  :url         => "http://example.gov/orgs/environmental-agency",
         
     | 
| 
      
 12 
     | 
    
         
            +
                  :description => "Tracks environmental compliance..."
         
     | 
| 
      
 13 
     | 
    
         
            +
                },
         
     | 
| 
      
 14 
     | 
    
         
            +
                {
         
     | 
| 
      
 15 
     | 
    
         
            +
                  :name        => "Inspector General",
         
     | 
| 
      
 16 
     | 
    
         
            +
                  :url         => "http://example.gov/orgs/inspector-general",
         
     | 
| 
      
 17 
     | 
    
         
            +
                  :description => "Inspects..."
         
     | 
| 
      
 18 
     | 
    
         
            +
                }
         
     | 
| 
      
 19 
     | 
    
         
            +
              ]
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              SOURCES = [
         
     | 
| 
      
 22 
     | 
    
         
            +
                {
         
     | 
| 
      
 23 
     | 
    
         
            +
                  :title             => "School District Performance",
         
     | 
| 
      
 24 
     | 
    
         
            +
                  :url               => "http://example.gov/data-sets/209",
         
     | 
| 
      
 25 
     | 
    
         
            +
                  :description       => "Comparative school performance...",
         
     | 
| 
      
 26 
     | 
    
         
            +
                  :frequency         => "annual",
         
     | 
| 
      
 27 
     | 
    
         
            +
                  :source_type       => "dataset",
         
     | 
| 
      
 28 
     | 
    
         
            +
                },
         
     | 
| 
      
 29 
     | 
    
         
            +
                {
         
     | 
| 
      
 30 
     | 
    
         
            +
                  :title             => "Economic Development",
         
     | 
| 
      
 31 
     | 
    
         
            +
                  :url               => "http://example.gov/data-sets/210",
         
     | 
| 
      
 32 
     | 
    
         
            +
                  :description       => "Economic indicators for...",
         
     | 
| 
      
 33 
     | 
    
         
            +
                  :frequency         => "monthly",
         
     | 
| 
      
 34 
     | 
    
         
            +
                  :source_type       => "dataset",
         
     | 
| 
      
 35 
     | 
    
         
            +
                },
         
     | 
| 
      
 36 
     | 
    
         
            +
                {
         
     | 
| 
      
 37 
     | 
    
         
            +
                  :title             => "Superfund Projects",
         
     | 
| 
      
 38 
     | 
    
         
            +
                  :url               => "http://example.gov/apis/5",
         
     | 
| 
      
 39 
     | 
    
         
            +
                  :description       => "API for environmental cleanup...",
         
     | 
| 
      
 40 
     | 
    
         
            +
                  :frequency         => "monthly",
         
     | 
| 
      
 41 
     | 
    
         
            +
                  :source_type       => "api",
         
     | 
| 
      
 42 
     | 
    
         
            +
                },
         
     | 
| 
      
 43 
     | 
    
         
            +
              ]
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
              def initialize(handler)
         
     | 
| 
      
 46 
     | 
    
         
            +
                @handler = handler
         
     | 
| 
      
 47 
     | 
    
         
            +
              end
         
     | 
| 
      
 48 
     | 
    
         
            +
              
         
     | 
| 
      
 49 
     | 
    
         
            +
              def run
         
     | 
| 
      
 50 
     | 
    
         
            +
                common = {
         
     | 
| 
      
 51 
     | 
    
         
            +
                  :catalog_name => "Example Catalog",
         
     | 
| 
      
 52 
     | 
    
         
            +
                  :catalog_url  => "http://example.gov",
         
     | 
| 
      
 53 
     | 
    
         
            +
                }
         
     | 
| 
      
 54 
     | 
    
         
            +
                ORGS.each do |o|
         
     | 
| 
      
 55 
     | 
    
         
            +
                  @handler.organization(
         
     | 
| 
      
 56 
     | 
    
         
            +
                    o.merge(common).merge({
         
     | 
| 
      
 57 
     | 
    
         
            +
                      :org_type     => "governmental",
         
     | 
| 
      
 58 
     | 
    
         
            +
                    })
         
     | 
| 
      
 59 
     | 
    
         
            +
                  )
         
     | 
| 
      
 60 
     | 
    
         
            +
                end
         
     | 
| 
      
 61 
     | 
    
         
            +
                SOURCES.each do |s|
         
     | 
| 
      
 62 
     | 
    
         
            +
                  @handler.source(
         
     | 
| 
      
 63 
     | 
    
         
            +
                    s.merge(common).merge({
         
     | 
| 
      
 64 
     | 
    
         
            +
                      :license      => "public domain",
         
     | 
| 
      
 65 
     | 
    
         
            +
                      :license_url  => "http://example.gov/license",
         
     | 
| 
      
 66 
     | 
    
         
            +
                    })
         
     | 
| 
      
 67 
     | 
    
         
            +
                  )
         
     | 
| 
      
 68 
     | 
    
         
            +
                end
         
     | 
| 
      
 69 
     | 
    
         
            +
              end
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
            end
         
     | 
    
        data/example/rakefile.rb
    ADDED
    
    | 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'rubygems'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'yaml'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require File.dirname(__FILE__) + '/../lib/datacatalog-importer'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require File.dirname(__FILE__) + '/lib/puller'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            def setup
         
     | 
| 
      
 7 
     | 
    
         
            +
              config_file = File.dirname(__FILE__) + '/config.yml'
         
     | 
| 
      
 8 
     | 
    
         
            +
              config = YAML.load_file(config_file)
         
     | 
| 
      
 9 
     | 
    
         
            +
              env = ENV['IMPORTER_ENV']
         
     | 
| 
      
 10 
     | 
    
         
            +
              raise "IMPORTER_ENV undefined" unless env
         
     | 
| 
      
 11 
     | 
    
         
            +
              raise "IMPORTER_ENV invalid" unless config[env]
         
     | 
| 
      
 12 
     | 
    
         
            +
              DataCatalog::ImporterFramework::Tasks.new({
         
     | 
| 
      
 13 
     | 
    
         
            +
                :api_key      => config[env]['api_key'],
         
     | 
| 
      
 14 
     | 
    
         
            +
                :base_uri     => config[env]['base_uri'],
         
     | 
| 
      
 15 
     | 
    
         
            +
                :cache_folder => File.dirname(__FILE__) + '/cache/parsed',
         
     | 
| 
      
 16 
     | 
    
         
            +
                :name         => "Example Catalog",
         
     | 
| 
      
 17 
     | 
    
         
            +
                :uri          => "http://example.datacatalog.gov",
         
     | 
| 
      
 18 
     | 
    
         
            +
                :puller       => Puller,
         
     | 
| 
      
 19 
     | 
    
         
            +
              })
         
     | 
| 
      
 20 
     | 
    
         
            +
            end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            setup
         
     | 
    
        data/lib/datacatalog-importer.rb
    CHANGED
    
    
    
        data/lib/handler.rb
    ADDED
    
    | 
         @@ -0,0 +1,33 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require File.dirname(__FILE__) + '/shared'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module DataCatalog
         
     | 
| 
      
 4 
     | 
    
         
            +
              module ImporterFramework
         
     | 
| 
      
 5 
     | 
    
         
            +
                class Handler
         
     | 
| 
      
 6 
     | 
    
         
            +
                  include Shared
         
     | 
| 
      
 7 
     | 
    
         
            +
                  
         
     | 
| 
      
 8 
     | 
    
         
            +
                  def initialize(options)
         
     | 
| 
      
 9 
     | 
    
         
            +
                    @options = options
         
     | 
| 
      
 10 
     | 
    
         
            +
                    @counter = {}
         
     | 
| 
      
 11 
     | 
    
         
            +
                    [:source, :organization].each do |resource|
         
     | 
| 
      
 12 
     | 
    
         
            +
                      FileUtils.mkdir_p(folder(resource))
         
     | 
| 
      
 13 
     | 
    
         
            +
                      @counter[resource] = 1
         
     | 
| 
      
 14 
     | 
    
         
            +
                    end
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
      
 16 
     | 
    
         
            +
                  
         
     | 
| 
      
 17 
     | 
    
         
            +
                  def source(data)
         
     | 
| 
      
 18 
     | 
    
         
            +
                    write_data(:source, data)
         
     | 
| 
      
 19 
     | 
    
         
            +
                  end
         
     | 
| 
      
 20 
     | 
    
         
            +
                  
         
     | 
| 
      
 21 
     | 
    
         
            +
                  def organization(data)
         
     | 
| 
      
 22 
     | 
    
         
            +
                    write_data(:organization, data)
         
     | 
| 
      
 23 
     | 
    
         
            +
                  end
         
     | 
| 
      
 24 
     | 
    
         
            +
                  
         
     | 
| 
      
 25 
     | 
    
         
            +
                  def write_data(resource, data)
         
     | 
| 
      
 26 
     | 
    
         
            +
                    file = folder(resource) + ("/%08i.yml" % @counter[resource])
         
     | 
| 
      
 27 
     | 
    
         
            +
                    Utility.write_yaml(file, data)
         
     | 
| 
      
 28 
     | 
    
         
            +
                    @counter[resource] += 1
         
     | 
| 
      
 29 
     | 
    
         
            +
                  end
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
              end
         
     | 
| 
      
 33 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/puller.rb
    CHANGED
    
    | 
         @@ -1,64 +1,23 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require File.dirname(__FILE__) + '/shared'
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
1 
     | 
    
         
             
            module DataCatalog
         
     | 
| 
       4 
2 
     | 
    
         
             
              module ImporterFramework
         
     | 
| 
       5 
3 
     | 
    
         
             
                class Puller
         
     | 
| 
       6 
     | 
    
         
            -
                  include Shared
         
     | 
| 
       7 
4 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
                  REQUIRED = %w(cache_folder  
     | 
| 
      
 5 
     | 
    
         
            +
                  REQUIRED = %w(cache_folder puller)
         
     | 
| 
       9 
6 
     | 
    
         | 
| 
       10 
7 
     | 
    
         
             
                  def initialize(options)
         
     | 
| 
       11 
8 
     | 
    
         
             
                    REQUIRED.each do |r|
         
     | 
| 
       12 
9 
     | 
    
         
             
                      raise Error, "option :#{r} is required" unless options[r.intern]
         
     | 
| 
       13 
10 
     | 
    
         
             
                    end
         
     | 
| 
       14 
11 
     | 
    
         
             
                    @options = options
         
     | 
| 
       15 
     | 
    
         
            -
                    @counter = {
         
     | 
| 
       16 
     | 
    
         
            -
                      :source       => 1,
         
     | 
| 
       17 
     | 
    
         
            -
                      :organization => 1,
         
     | 
| 
       18 
     | 
    
         
            -
                    }
         
     | 
| 
       19 
12 
     | 
    
         
             
                  end
         
     | 
| 
       20 
13 
     | 
    
         | 
| 
       21 
14 
     | 
    
         
             
                  def run
         
     | 
| 
       22 
     | 
    
         
            -
                    Utility.report_timing "pull 
     | 
| 
       23 
     | 
    
         
            -
                       
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
                      pull_resource(:organization)
         
     | 
| 
      
 15 
     | 
    
         
            +
                    Utility.report_timing "pull" do
         
     | 
| 
      
 16 
     | 
    
         
            +
                      handler = Handler.new(@options)
         
     | 
| 
      
 17 
     | 
    
         
            +
                      puller = @options[:puller].new(handler)
         
     | 
| 
      
 18 
     | 
    
         
            +
                      puller.run
         
     | 
| 
       27 
19 
     | 
    
         
             
                    end
         
     | 
| 
       28 
20 
     | 
    
         
             
                  end
         
     | 
| 
       29 
     | 
    
         
            -
                  
         
     | 
| 
       30 
     | 
    
         
            -
                  protected
         
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
                  # Note on HTTP Throttling
         
     | 
| 
       33 
     | 
    
         
            -
                  #
         
     | 
| 
       34 
     | 
    
         
            -
                  # It might make sense to throttle HTTP calls in
         
     | 
| 
       35 
     | 
    
         
            -
                  # * pull_organizations
         
     | 
| 
       36 
     | 
    
         
            -
                  # * pull_sources
         
     | 
| 
       37 
     | 
    
         
            -
                  #
         
     | 
| 
       38 
     | 
    
         
            -
                  # However, doing a simple sleep(TIME_DELAY) is too blunt.
         
     | 
| 
       39 
     | 
    
         
            -
                  # It makes sense when an HTTP call is made; however, it does
         
     | 
| 
       40 
     | 
    
         
            -
                  # not make sense when the importer uses a local cache.
         
     | 
| 
       41 
     | 
    
         
            -
                  #
         
     | 
| 
       42 
     | 
    
         
            -
                  # An alternative is to wrap HTTP calls in this Importer library.
         
     | 
| 
       43 
     | 
    
         
            -
                  # It could add a little bit of delay to HTTP calls that are made
         
     | 
| 
       44 
     | 
    
         
            -
                  # too rapidly.
         
     | 
| 
       45 
     | 
    
         
            -
                  #
         
     | 
| 
       46 
     | 
    
         
            -
                  def pull_resource(resource)
         
     | 
| 
       47 
     | 
    
         
            -
                    unless importer_class = @options[:pullers][resource]
         
     | 
| 
       48 
     | 
    
         
            -
                      raise Error, "options[:pullers][:#{resource}] is required"
         
     | 
| 
       49 
     | 
    
         
            -
                    end
         
     | 
| 
       50 
     | 
    
         
            -
                    importer = importer_class.new
         
     | 
| 
       51 
     | 
    
         
            -
                    FileUtils.mkdir_p(folder(resource))
         
     | 
| 
       52 
     | 
    
         
            -
                    while (data = importer.fetch) do
         
     | 
| 
       53 
     | 
    
         
            -
                      write_data(resource, data)
         
     | 
| 
       54 
     | 
    
         
            -
                    end
         
     | 
| 
       55 
     | 
    
         
            -
                  end
         
     | 
| 
       56 
     | 
    
         
            -
                  
         
     | 
| 
       57 
     | 
    
         
            -
                  def write_data(resource, data)
         
     | 
| 
       58 
     | 
    
         
            -
                    file = folder(resource) + ("/%08i.yml" % @counter[resource])
         
     | 
| 
       59 
     | 
    
         
            -
                    Utility.write_yaml(file, data)
         
     | 
| 
       60 
     | 
    
         
            -
                    @counter[resource] += 1
         
     | 
| 
       61 
     | 
    
         
            -
                  end
         
     | 
| 
       62 
21 
     | 
    
         | 
| 
       63 
22 
     | 
    
         
             
                end
         
     | 
| 
       64 
23 
     | 
    
         
             
              end
         
     | 
    
        data/lib/shared.rb
    CHANGED
    
    | 
         @@ -1,6 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module DataCatalog
         
     | 
| 
       2 
2 
     | 
    
         
             
              module ImporterFramework
         
     | 
| 
       3 
3 
     | 
    
         
             
                module Shared
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
       4 
5 
     | 
    
         
             
                  def folder(resource)
         
     | 
| 
       5 
6 
     | 
    
         
             
                    unless @options
         
     | 
| 
       6 
7 
     | 
    
         
             
                      raise Error, "@options is undefined"
         
     | 
| 
         @@ -10,6 +11,7 @@ module DataCatalog 
     | 
|
| 
       10 
11 
     | 
    
         
             
                    end
         
     | 
| 
       11 
12 
     | 
    
         
             
                    File.join(@options[:cache_folder], resource.to_s)
         
     | 
| 
       12 
13 
     | 
    
         
             
                  end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
       13 
15 
     | 
    
         
             
                end
         
     | 
| 
       14 
16 
     | 
    
         
             
              end
         
     | 
| 
       15 
17 
     | 
    
         
             
            end
         
     | 
    
        data/lib/tasks.rb
    CHANGED
    
    
    
        data/lib/utility.rb
    CHANGED
    
    
    
        data/spec/spec_helper.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,12 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification 
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: datacatalog-importer
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
      
 4 
     | 
    
         
            +
              hash: 23
         
     | 
| 
       4 
5 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       5 
6 
     | 
    
         
             
              segments: 
         
     | 
| 
       6 
7 
     | 
    
         
             
              - 0
         
     | 
| 
       7 
     | 
    
         
            -
              -  
     | 
| 
       8 
     | 
    
         
            -
              -  
     | 
| 
       9 
     | 
    
         
            -
              version: 0. 
     | 
| 
      
 8 
     | 
    
         
            +
              - 2
         
     | 
| 
      
 9 
     | 
    
         
            +
              - 0
         
     | 
| 
      
 10 
     | 
    
         
            +
              version: 0.2.0
         
     | 
| 
       10 
11 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       11 
12 
     | 
    
         
             
            authors: 
         
     | 
| 
       12 
13 
     | 
    
         
             
            - David James
         
     | 
| 
         @@ -14,44 +15,50 @@ autorequire: 
     | 
|
| 
       14 
15 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       15 
16 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       16 
17 
     | 
    
         | 
| 
       17 
     | 
    
         
            -
            date: 2010- 
     | 
| 
      
 18 
     | 
    
         
            +
            date: 2010-07-08 00:00:00 -04:00
         
     | 
| 
       18 
19 
     | 
    
         
             
            default_executable: 
         
     | 
| 
       19 
20 
     | 
    
         
             
            dependencies: 
         
     | 
| 
       20 
21 
     | 
    
         
             
            - !ruby/object:Gem::Dependency 
         
     | 
| 
       21 
22 
     | 
    
         
             
              name: nokogiri
         
     | 
| 
       22 
23 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       23 
24 
     | 
    
         
             
              requirement: &id001 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 25 
     | 
    
         
            +
                none: false
         
     | 
| 
       24 
26 
     | 
    
         
             
                requirements: 
         
     | 
| 
       25 
27 
     | 
    
         
             
                - - ">="
         
     | 
| 
       26 
28 
     | 
    
         
             
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 29 
     | 
    
         
            +
                    hash: 3
         
     | 
| 
       27 
30 
     | 
    
         
             
                    segments: 
         
     | 
| 
       28 
31 
     | 
    
         
             
                    - 1
         
     | 
| 
       29 
32 
     | 
    
         
             
                    - 4
         
     | 
| 
       30 
     | 
    
         
            -
                    -  
     | 
| 
       31 
     | 
    
         
            -
                    version: 1.4. 
     | 
| 
      
 33 
     | 
    
         
            +
                    - 2
         
     | 
| 
      
 34 
     | 
    
         
            +
                    version: 1.4.2
         
     | 
| 
       32 
35 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       33 
36 
     | 
    
         
             
              version_requirements: *id001
         
     | 
| 
       34 
37 
     | 
    
         
             
            - !ruby/object:Gem::Dependency 
         
     | 
| 
       35 
38 
     | 
    
         
             
              name: datacatalog
         
     | 
| 
       36 
39 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       37 
40 
     | 
    
         
             
              requirement: &id002 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 41 
     | 
    
         
            +
                none: false
         
     | 
| 
       38 
42 
     | 
    
         
             
                requirements: 
         
     | 
| 
       39 
43 
     | 
    
         
             
                - - ">="
         
     | 
| 
       40 
44 
     | 
    
         
             
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 45 
     | 
    
         
            +
                    hash: 17
         
     | 
| 
       41 
46 
     | 
    
         
             
                    segments: 
         
     | 
| 
       42 
47 
     | 
    
         
             
                    - 0
         
     | 
| 
       43 
48 
     | 
    
         
             
                    - 4
         
     | 
| 
       44 
     | 
    
         
            -
                    -  
     | 
| 
       45 
     | 
    
         
            -
                    version: 0.4. 
     | 
| 
      
 49 
     | 
    
         
            +
                    - 15
         
     | 
| 
      
 50 
     | 
    
         
            +
                    version: 0.4.15
         
     | 
| 
       46 
51 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       47 
52 
     | 
    
         
             
              version_requirements: *id002
         
     | 
| 
       48 
53 
     | 
    
         
             
            - !ruby/object:Gem::Dependency 
         
     | 
| 
       49 
54 
     | 
    
         
             
              name: rspec
         
     | 
| 
       50 
55 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       51 
56 
     | 
    
         
             
              requirement: &id003 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 57 
     | 
    
         
            +
                none: false
         
     | 
| 
       52 
58 
     | 
    
         
             
                requirements: 
         
     | 
| 
       53 
59 
     | 
    
         
             
                - - ">="
         
     | 
| 
       54 
60 
     | 
    
         
             
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 61 
     | 
    
         
            +
                    hash: 13
         
     | 
| 
       55 
62 
     | 
    
         
             
                    segments: 
         
     | 
| 
       56 
63 
     | 
    
         
             
                    - 1
         
     | 
| 
       57 
64 
     | 
    
         
             
                    - 2
         
     | 
| 
         @@ -76,7 +83,12 @@ files: 
     | 
|
| 
       76 
83 
     | 
    
         
             
            - Rakefile
         
     | 
| 
       77 
84 
     | 
    
         
             
            - VERSION
         
     | 
| 
       78 
85 
     | 
    
         
             
            - datacatalog-importer.gemspec
         
     | 
| 
      
 86 
     | 
    
         
            +
            - example/README.md
         
     | 
| 
      
 87 
     | 
    
         
            +
            - example/config.example.yml
         
     | 
| 
      
 88 
     | 
    
         
            +
            - example/lib/puller.rb
         
     | 
| 
      
 89 
     | 
    
         
            +
            - example/rakefile.rb
         
     | 
| 
       79 
90 
     | 
    
         
             
            - lib/datacatalog-importer.rb
         
     | 
| 
      
 91 
     | 
    
         
            +
            - lib/handler.rb
         
     | 
| 
       80 
92 
     | 
    
         
             
            - lib/importer.rb
         
     | 
| 
       81 
93 
     | 
    
         
             
            - lib/puller.rb
         
     | 
| 
       82 
94 
     | 
    
         
             
            - lib/pusher.rb
         
     | 
| 
         @@ -88,7 +100,7 @@ files: 
     | 
|
| 
       88 
100 
     | 
    
         
             
            - spec/spec_helper.rb
         
     | 
| 
       89 
101 
     | 
    
         
             
            - spec/utility_spec.rb
         
     | 
| 
       90 
102 
     | 
    
         
             
            has_rdoc: true
         
     | 
| 
       91 
     | 
    
         
            -
            homepage: http://github.com/ 
     | 
| 
      
 103 
     | 
    
         
            +
            homepage: http://github.com/sunlightlabs/datacatalog-importer
         
     | 
| 
       92 
104 
     | 
    
         
             
            licenses: []
         
     | 
| 
       93 
105 
     | 
    
         | 
| 
       94 
106 
     | 
    
         
             
            post_install_message: 
         
     | 
| 
         @@ -97,23 +109,27 @@ rdoc_options: 
     | 
|
| 
       97 
109 
     | 
    
         
             
            require_paths: 
         
     | 
| 
       98 
110 
     | 
    
         
             
            - lib
         
     | 
| 
       99 
111 
     | 
    
         
             
            required_ruby_version: !ruby/object:Gem::Requirement 
         
     | 
| 
      
 112 
     | 
    
         
            +
              none: false
         
     | 
| 
       100 
113 
     | 
    
         
             
              requirements: 
         
     | 
| 
       101 
114 
     | 
    
         
             
              - - ">="
         
     | 
| 
       102 
115 
     | 
    
         
             
                - !ruby/object:Gem::Version 
         
     | 
| 
      
 116 
     | 
    
         
            +
                  hash: 3
         
     | 
| 
       103 
117 
     | 
    
         
             
                  segments: 
         
     | 
| 
       104 
118 
     | 
    
         
             
                  - 0
         
     | 
| 
       105 
119 
     | 
    
         
             
                  version: "0"
         
     | 
| 
       106 
120 
     | 
    
         
             
            required_rubygems_version: !ruby/object:Gem::Requirement 
         
     | 
| 
      
 121 
     | 
    
         
            +
              none: false
         
     | 
| 
       107 
122 
     | 
    
         
             
              requirements: 
         
     | 
| 
       108 
123 
     | 
    
         
             
              - - ">="
         
     | 
| 
       109 
124 
     | 
    
         
             
                - !ruby/object:Gem::Version 
         
     | 
| 
      
 125 
     | 
    
         
            +
                  hash: 3
         
     | 
| 
       110 
126 
     | 
    
         
             
                  segments: 
         
     | 
| 
       111 
127 
     | 
    
         
             
                  - 0
         
     | 
| 
       112 
128 
     | 
    
         
             
                  version: "0"
         
     | 
| 
       113 
129 
     | 
    
         
             
            requirements: []
         
     | 
| 
       114 
130 
     | 
    
         | 
| 
       115 
131 
     | 
    
         
             
            rubyforge_project: 
         
     | 
| 
       116 
     | 
    
         
            -
            rubygems_version: 1.3. 
     | 
| 
      
 132 
     | 
    
         
            +
            rubygems_version: 1.3.7
         
     | 
| 
       117 
133 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       118 
134 
     | 
    
         
             
            specification_version: 3
         
     | 
| 
       119 
135 
     | 
    
         
             
            summary: A framework to write National Data Catalog importers
         
     |