datacatalog-importer 0.1.19 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -17,5 +17,7 @@ tmtags
17
17
  coverage
18
18
  rdoc
19
19
  pkg
20
+ example/cache
21
+ example/config.yml
20
22
 
21
23
  ## PROJECT::SPECIFIC
data/Rakefile CHANGED
@@ -8,10 +8,10 @@ begin
8
8
  gem.summary = %Q{A framework to write National Data Catalog importers}
9
9
  gem.description = %Q{This framework makes it easier to write importers for the National Data Catalog.}
10
10
  gem.email = "djames@sunlightfoundation.com"
11
- gem.homepage = "http://github.com/djsun/datacatalog-importer"
11
+ gem.homepage = "http://github.com/sunlightlabs/datacatalog-importer"
12
12
  gem.authors = ["David James"]
13
- gem.add_dependency "nokogiri", ">= 1.4.1"
14
- gem.add_dependency "datacatalog", ">= 0.4.14"
13
+ gem.add_dependency "nokogiri", ">= 1.4.2"
14
+ gem.add_dependency "datacatalog", ">= 0.4.15"
15
15
  gem.add_development_dependency "rspec", ">= 1.2.9"
16
16
  # gem is a Gem::Specification...
17
17
  # see http://www.rubygems.org/read/chapter/20 for additional settings
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.19
1
+ 0.2.0
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{datacatalog-importer}
8
- s.version = "0.1.19"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["David James"]
12
- s.date = %q{2010-05-12}
12
+ s.date = %q{2010-07-08}
13
13
  s.description = %q{This framework makes it easier to write importers for the National Data Catalog.}
14
14
  s.email = %q{djames@sunlightfoundation.com}
15
15
  s.extra_rdoc_files = [
@@ -24,7 +24,12 @@ Gem::Specification.new do |s|
24
24
  "Rakefile",
25
25
  "VERSION",
26
26
  "datacatalog-importer.gemspec",
27
+ "example/README.md",
28
+ "example/config.example.yml",
29
+ "example/lib/puller.rb",
30
+ "example/rakefile.rb",
27
31
  "lib/datacatalog-importer.rb",
32
+ "lib/handler.rb",
28
33
  "lib/importer.rb",
29
34
  "lib/puller.rb",
30
35
  "lib/pusher.rb",
@@ -36,10 +41,10 @@ Gem::Specification.new do |s|
36
41
  "spec/spec_helper.rb",
37
42
  "spec/utility_spec.rb"
38
43
  ]
39
- s.homepage = %q{http://github.com/djsun/datacatalog-importer}
44
+ s.homepage = %q{http://github.com/sunlightlabs/datacatalog-importer}
40
45
  s.rdoc_options = ["--charset=UTF-8"]
41
46
  s.require_paths = ["lib"]
42
- s.rubygems_version = %q{1.3.6}
47
+ s.rubygems_version = %q{1.3.7}
43
48
  s.summary = %q{A framework to write National Data Catalog importers}
44
49
  s.test_files = [
45
50
  "spec/spec_helper.rb",
@@ -50,18 +55,18 @@ Gem::Specification.new do |s|
50
55
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
51
56
  s.specification_version = 3
52
57
 
53
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
54
- s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
55
- s.add_runtime_dependency(%q<datacatalog>, [">= 0.4.14"])
58
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
59
+ s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.2"])
60
+ s.add_runtime_dependency(%q<datacatalog>, [">= 0.4.15"])
56
61
  s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
57
62
  else
58
- s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
59
- s.add_dependency(%q<datacatalog>, [">= 0.4.14"])
63
+ s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
64
+ s.add_dependency(%q<datacatalog>, [">= 0.4.15"])
60
65
  s.add_dependency(%q<rspec>, [">= 1.2.9"])
61
66
  end
62
67
  else
63
- s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
64
- s.add_dependency(%q<datacatalog>, [">= 0.4.14"])
68
+ s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
69
+ s.add_dependency(%q<datacatalog>, [">= 0.4.15"])
65
70
  s.add_dependency(%q<rspec>, [">= 1.2.9"])
66
71
  end
67
72
  end
data/example/README.md ADDED
@@ -0,0 +1,5 @@
1
+ # README
2
+
3
+ This is an example of how to write an importer that depends on the National Data Catalog importer framework.
4
+
5
+ Please note that it is a bare bones example. It simply demonstrates how to setup your rakefile and just the basic moving parts.
@@ -0,0 +1,3 @@
1
+ local:
2
+ api_key: "0000000000000000000000000000000000000000"
3
+ base_uri: "http://localhost:3000"
@@ -0,0 +1,71 @@
1
+ class Puller
2
+
3
+ ORGS = [
4
+ {
5
+ :name => "Budget Office",
6
+ :url => "http://example.gov/orgs/budget-office",
7
+ :description => "Prepares the executive budget..."
8
+ },
9
+ {
10
+ :name => "Environmental Agency",
11
+ :url => "http://example.gov/orgs/environmental-agency",
12
+ :description => "Tracks environmental compliance..."
13
+ },
14
+ {
15
+ :name => "Inspector General",
16
+ :url => "http://example.gov/orgs/inspector-general",
17
+ :description => "Inspects..."
18
+ }
19
+ ]
20
+
21
+ SOURCES = [
22
+ {
23
+ :title => "School District Performance",
24
+ :url => "http://example.gov/data-sets/209",
25
+ :description => "Comparative school performance...",
26
+ :frequency => "annual",
27
+ :source_type => "dataset",
28
+ },
29
+ {
30
+ :title => "Economic Development",
31
+ :url => "http://example.gov/data-sets/210",
32
+ :description => "Economic indicators for...",
33
+ :frequency => "monthly",
34
+ :source_type => "dataset",
35
+ },
36
+ {
37
+ :title => "Superfund Projects",
38
+ :url => "http://example.gov/apis/5",
39
+ :description => "API for environmental cleanup...",
40
+ :frequency => "monthly",
41
+ :source_type => "api",
42
+ },
43
+ ]
44
+
45
+ def initialize(handler)
46
+ @handler = handler
47
+ end
48
+
49
+ def run
50
+ common = {
51
+ :catalog_name => "Example Catalog",
52
+ :catalog_url => "http://example.gov",
53
+ }
54
+ ORGS.each do |o|
55
+ @handler.organization(
56
+ o.merge(common).merge({
57
+ :org_type => "governmental",
58
+ })
59
+ )
60
+ end
61
+ SOURCES.each do |s|
62
+ @handler.source(
63
+ s.merge(common).merge({
64
+ :license => "public domain",
65
+ :license_url => "http://example.gov/license",
66
+ })
67
+ )
68
+ end
69
+ end
70
+
71
+ end
@@ -0,0 +1,22 @@
1
+ require 'rubygems'
2
+ require 'yaml'
3
+ require File.dirname(__FILE__) + '/../lib/datacatalog-importer'
4
+ require File.dirname(__FILE__) + '/lib/puller'
5
+
6
+ def setup
7
+ config_file = File.dirname(__FILE__) + '/config.yml'
8
+ config = YAML.load_file(config_file)
9
+ env = ENV['IMPORTER_ENV']
10
+ raise "IMPORTER_ENV undefined" unless env
11
+ raise "IMPORTER_ENV invalid" unless config[env]
12
+ DataCatalog::ImporterFramework::Tasks.new({
13
+ :api_key => config[env]['api_key'],
14
+ :base_uri => config[env]['base_uri'],
15
+ :cache_folder => File.dirname(__FILE__) + '/cache/parsed',
16
+ :name => "Example Catalog",
17
+ :uri => "http://example.datacatalog.gov",
18
+ :puller => Puller,
19
+ })
20
+ end
21
+
22
+ setup
@@ -6,6 +6,7 @@
6
6
  # strange conflicts.
7
7
  require 'generator.rb'
8
8
 
9
+ require File.dirname(__FILE__) + '/handler'
9
10
  require File.dirname(__FILE__) + '/importer'
10
11
  require File.dirname(__FILE__) + '/puller'
11
12
  require File.dirname(__FILE__) + '/pusher'
data/lib/handler.rb ADDED
@@ -0,0 +1,33 @@
1
+ require File.dirname(__FILE__) + '/shared'
2
+
3
+ module DataCatalog
4
+ module ImporterFramework
5
+ class Handler
6
+ include Shared
7
+
8
+ def initialize(options)
9
+ @options = options
10
+ @counter = {}
11
+ [:source, :organization].each do |resource|
12
+ FileUtils.mkdir_p(folder(resource))
13
+ @counter[resource] = 1
14
+ end
15
+ end
16
+
17
+ def source(data)
18
+ write_data(:source, data)
19
+ end
20
+
21
+ def organization(data)
22
+ write_data(:organization, data)
23
+ end
24
+
25
+ def write_data(resource, data)
26
+ file = folder(resource) + ("/%08i.yml" % @counter[resource])
27
+ Utility.write_yaml(file, data)
28
+ @counter[resource] += 1
29
+ end
30
+
31
+ end
32
+ end
33
+ end
data/lib/puller.rb CHANGED
@@ -1,64 +1,23 @@
1
- require File.dirname(__FILE__) + '/shared'
2
-
3
1
  module DataCatalog
4
2
  module ImporterFramework
5
3
  class Puller
6
- include Shared
7
4
 
8
- REQUIRED = %w(cache_folder pullers)
5
+ REQUIRED = %w(cache_folder puller)
9
6
 
10
7
  def initialize(options)
11
8
  REQUIRED.each do |r|
12
9
  raise Error, "option :#{r} is required" unless options[r.intern]
13
10
  end
14
11
  @options = options
15
- @counter = {
16
- :source => 1,
17
- :organization => 1,
18
- }
19
12
  end
20
13
 
21
14
  def run
22
- Utility.report_timing "pull source" do
23
- pull_resource(:source)
24
- end
25
- Utility.report_timing "pull organization" do
26
- pull_resource(:organization)
15
+ Utility.report_timing "pull" do
16
+ handler = Handler.new(@options)
17
+ puller = @options[:puller].new(handler)
18
+ puller.run
27
19
  end
28
20
  end
29
-
30
- protected
31
-
32
- # Note on HTTP Throttling
33
- #
34
- # It might make sense to throttle HTTP calls in
35
- # * pull_organizations
36
- # * pull_sources
37
- #
38
- # However, doing a simple sleep(TIME_DELAY) is too blunt.
39
- # It makes sense when an HTTP call is made; however, it does
40
- # not make sense when the importer uses a local cache.
41
- #
42
- # An alternative is to wrap HTTP calls in this Importer library.
43
- # It could add a little bit of delay to HTTP calls that are made
44
- # too rapidly.
45
- #
46
- def pull_resource(resource)
47
- unless importer_class = @options[:pullers][resource]
48
- raise Error, "options[:pullers][:#{resource}] is required"
49
- end
50
- importer = importer_class.new
51
- FileUtils.mkdir_p(folder(resource))
52
- while (data = importer.fetch) do
53
- write_data(resource, data)
54
- end
55
- end
56
-
57
- def write_data(resource, data)
58
- file = folder(resource) + ("/%08i.yml" % @counter[resource])
59
- Utility.write_yaml(file, data)
60
- @counter[resource] += 1
61
- end
62
21
 
63
22
  end
64
23
  end
data/lib/shared.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  module DataCatalog
2
2
  module ImporterFramework
3
3
  module Shared
4
+
4
5
  def folder(resource)
5
6
  unless @options
6
7
  raise Error, "@options is undefined"
@@ -10,6 +11,7 @@ module DataCatalog
10
11
  end
11
12
  File.join(@options[:cache_folder], resource.to_s)
12
13
  end
14
+
13
15
  end
14
16
  end
15
17
  end
data/lib/tasks.rb CHANGED
@@ -14,7 +14,7 @@ module DataCatalog
14
14
  puts "Pulling data from the #{options[:name]}..."
15
15
  puller = Puller.new({
16
16
  :cache_folder => options[:cache_folder],
17
- :pullers => options[:pullers],
17
+ :puller => options[:puller],
18
18
  })
19
19
  puller.run
20
20
  end
data/lib/utility.rb CHANGED
@@ -56,7 +56,7 @@ module DataCatalog
56
56
 
57
57
  def self.headers
58
58
  {
59
- "UserAgent" => "National Data Catalog Importer/0.1.19",
59
+ "UserAgent" => "National Data Catalog Importer/0.2.0",
60
60
  }
61
61
  end
62
62
 
data/spec/spec_helper.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  $LOAD_PATH.unshift(File.dirname(__FILE__))
2
2
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'rubygems'
3
4
  require 'datacatalog-importer'
4
5
  require 'spec'
5
6
  require 'spec/autorun'
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datacatalog-importer
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 23
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
- - 1
8
- - 19
9
- version: 0.1.19
8
+ - 2
9
+ - 0
10
+ version: 0.2.0
10
11
  platform: ruby
11
12
  authors:
12
13
  - David James
@@ -14,44 +15,50 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2010-05-12 00:00:00 -04:00
18
+ date: 2010-07-08 00:00:00 -04:00
18
19
  default_executable:
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
21
22
  name: nokogiri
22
23
  prerelease: false
23
24
  requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
24
26
  requirements:
25
27
  - - ">="
26
28
  - !ruby/object:Gem::Version
29
+ hash: 3
27
30
  segments:
28
31
  - 1
29
32
  - 4
30
- - 1
31
- version: 1.4.1
33
+ - 2
34
+ version: 1.4.2
32
35
  type: :runtime
33
36
  version_requirements: *id001
34
37
  - !ruby/object:Gem::Dependency
35
38
  name: datacatalog
36
39
  prerelease: false
37
40
  requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
38
42
  requirements:
39
43
  - - ">="
40
44
  - !ruby/object:Gem::Version
45
+ hash: 17
41
46
  segments:
42
47
  - 0
43
48
  - 4
44
- - 14
45
- version: 0.4.14
49
+ - 15
50
+ version: 0.4.15
46
51
  type: :runtime
47
52
  version_requirements: *id002
48
53
  - !ruby/object:Gem::Dependency
49
54
  name: rspec
50
55
  prerelease: false
51
56
  requirement: &id003 !ruby/object:Gem::Requirement
57
+ none: false
52
58
  requirements:
53
59
  - - ">="
54
60
  - !ruby/object:Gem::Version
61
+ hash: 13
55
62
  segments:
56
63
  - 1
57
64
  - 2
@@ -76,7 +83,12 @@ files:
76
83
  - Rakefile
77
84
  - VERSION
78
85
  - datacatalog-importer.gemspec
86
+ - example/README.md
87
+ - example/config.example.yml
88
+ - example/lib/puller.rb
89
+ - example/rakefile.rb
79
90
  - lib/datacatalog-importer.rb
91
+ - lib/handler.rb
80
92
  - lib/importer.rb
81
93
  - lib/puller.rb
82
94
  - lib/pusher.rb
@@ -88,7 +100,7 @@ files:
88
100
  - spec/spec_helper.rb
89
101
  - spec/utility_spec.rb
90
102
  has_rdoc: true
91
- homepage: http://github.com/djsun/datacatalog-importer
103
+ homepage: http://github.com/sunlightlabs/datacatalog-importer
92
104
  licenses: []
93
105
 
94
106
  post_install_message:
@@ -97,23 +109,27 @@ rdoc_options:
97
109
  require_paths:
98
110
  - lib
99
111
  required_ruby_version: !ruby/object:Gem::Requirement
112
+ none: false
100
113
  requirements:
101
114
  - - ">="
102
115
  - !ruby/object:Gem::Version
116
+ hash: 3
103
117
  segments:
104
118
  - 0
105
119
  version: "0"
106
120
  required_rubygems_version: !ruby/object:Gem::Requirement
121
+ none: false
107
122
  requirements:
108
123
  - - ">="
109
124
  - !ruby/object:Gem::Version
125
+ hash: 3
110
126
  segments:
111
127
  - 0
112
128
  version: "0"
113
129
  requirements: []
114
130
 
115
131
  rubyforge_project:
116
- rubygems_version: 1.3.6
132
+ rubygems_version: 1.3.7
117
133
  signing_key:
118
134
  specification_version: 3
119
135
  summary: A framework to write National Data Catalog importers