datacatalog-importer 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/datacatalog-importer.gemspec +2 -2
- data/lib/puller.rb +1 -1
- data/lib/utility.rb +30 -6
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.2
|
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{datacatalog-importer}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["David James"]
|
12
|
-
s.date = %q{2010-02
|
12
|
+
s.date = %q{2010-03-02}
|
13
13
|
s.description = %q{This framework makes it easier to write importers for the National Data Catalog.}
|
14
14
|
s.email = %q{djames@sunlightfoundation.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/puller.rb
CHANGED
@@ -45,7 +45,7 @@ module DataCatalog
|
|
45
45
|
#
|
46
46
|
def pull_resource(resource)
|
47
47
|
unless importer_class = @options[:pullers][resource]
|
48
|
-
raise Error, "options[:pullers][:#{
|
48
|
+
raise Error, "options[:pullers][:#{resource}] is required"
|
49
49
|
end
|
50
50
|
importer = importer_class.new
|
51
51
|
FileUtils.mkdir_p(folder(resource))
|
data/lib/utility.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'fastercsv'
|
1
2
|
require 'nokogiri'
|
2
3
|
require 'open-uri'
|
3
4
|
|
@@ -30,28 +31,51 @@ module DataCatalog
|
|
30
31
|
"UserAgent" => "National Data Catalog Importer/0.1.0",
|
31
32
|
}
|
32
33
|
end
|
34
|
+
|
35
|
+
def self.parse_csv_from_file(filename, extra_header_rows=0)
|
36
|
+
File.open(filename) do |f|
|
37
|
+
extra_header_rows.times { f.gets } # ignore these rows
|
38
|
+
FasterCSV.parse(f)
|
39
|
+
end
|
40
|
+
end
|
33
41
|
|
34
|
-
def self.
|
42
|
+
def self.parse_html_from_file(filename)
|
35
43
|
File.open(filename) do |f|
|
36
44
|
Nokogiri::HTML::Document.parse(f)
|
37
45
|
end
|
38
46
|
end
|
39
47
|
|
40
|
-
def self.
|
48
|
+
def self.parse_csv_from_file_or_uri(uri, file, options={})
|
41
49
|
if options[:force_fetch] || !File.exist?(file)
|
42
|
-
document =
|
50
|
+
document = parse_csv_from_uri(uri)
|
43
51
|
File.open(file, "w") { |f| f.write(document) }
|
44
52
|
end
|
45
|
-
|
53
|
+
parse_csv_from_file(file, options[:extra_header_rows] || 0)
|
54
|
+
# Why always parse the file? Consistency with parse_html_from_file_or_uri.
|
46
55
|
end
|
47
|
-
|
48
|
-
def self.
|
56
|
+
|
57
|
+
def self.parse_html_from_file_or_uri(uri, file, options={})
|
58
|
+
if options[:force_fetch] || !File.exist?(file)
|
59
|
+
document = parse_html_from_uri(uri)
|
60
|
+
File.open(file, "w") { |f| f.write(document) }
|
61
|
+
end
|
62
|
+
parse_html_from_file(file) # Why always parse the file? See Note 001, below.
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.parse_csv_from_uri(uri)
|
49
66
|
puts "Fetching #{uri}..."
|
50
67
|
open(uri, headers) do |io|
|
51
68
|
Nokogiri::HTML::Document.parse(io)
|
52
69
|
end
|
53
70
|
end
|
54
71
|
|
72
|
+
def self.parse_html_from_uri(uri)
|
73
|
+
puts "Fetching #{uri}..."
|
74
|
+
open(uri, headers) do |io|
|
75
|
+
FasterCSV.parse(io)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
55
79
|
# ActiveSupport 2.3.5 adds @_rails_html_safe aggressively.
|
56
80
|
# This method removes it so you can output clean YAML.
|
57
81
|
def self.plain_string(s)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datacatalog-importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David James
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02
|
12
|
+
date: 2010-03-02 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|