datacatalog-importer 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/datacatalog-importer.gemspec +2 -2
- data/lib/puller.rb +1 -1
- data/lib/utility.rb +30 -6
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.2
|
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{datacatalog-importer}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["David James"]
|
12
|
-
s.date = %q{2010-02
|
12
|
+
s.date = %q{2010-03-02}
|
13
13
|
s.description = %q{This framework makes it easier to write importers for the National Data Catalog.}
|
14
14
|
s.email = %q{djames@sunlightfoundation.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/puller.rb
CHANGED
@@ -45,7 +45,7 @@ module DataCatalog
|
|
45
45
|
#
|
46
46
|
def pull_resource(resource)
|
47
47
|
unless importer_class = @options[:pullers][resource]
|
48
|
-
raise Error, "options[:pullers][:#{
|
48
|
+
raise Error, "options[:pullers][:#{resource}] is required"
|
49
49
|
end
|
50
50
|
importer = importer_class.new
|
51
51
|
FileUtils.mkdir_p(folder(resource))
|
data/lib/utility.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'fastercsv'
|
1
2
|
require 'nokogiri'
|
2
3
|
require 'open-uri'
|
3
4
|
|
@@ -30,28 +31,51 @@ module DataCatalog
|
|
30
31
|
"UserAgent" => "National Data Catalog Importer/0.1.0",
|
31
32
|
}
|
32
33
|
end
|
34
|
+
|
35
|
+
def self.parse_csv_from_file(filename, extra_header_rows=0)
|
36
|
+
File.open(filename) do |f|
|
37
|
+
extra_header_rows.times { f.gets } # ignore these rows
|
38
|
+
FasterCSV.parse(f)
|
39
|
+
end
|
40
|
+
end
|
33
41
|
|
34
|
-
def self.
|
42
|
+
def self.parse_html_from_file(filename)
|
35
43
|
File.open(filename) do |f|
|
36
44
|
Nokogiri::HTML::Document.parse(f)
|
37
45
|
end
|
38
46
|
end
|
39
47
|
|
40
|
-
def self.
|
48
|
+
def self.parse_csv_from_file_or_uri(uri, file, options={})
|
41
49
|
if options[:force_fetch] || !File.exist?(file)
|
42
|
-
document =
|
50
|
+
document = parse_csv_from_uri(uri)
|
43
51
|
File.open(file, "w") { |f| f.write(document) }
|
44
52
|
end
|
45
|
-
|
53
|
+
parse_csv_from_file(file, options[:extra_header_rows] || 0)
|
54
|
+
# Why always parse the file? Consistency with parse_html_from_file_or_uri.
|
46
55
|
end
|
47
|
-
|
48
|
-
def self.
|
56
|
+
|
57
|
+
def self.parse_html_from_file_or_uri(uri, file, options={})
|
58
|
+
if options[:force_fetch] || !File.exist?(file)
|
59
|
+
document = parse_html_from_uri(uri)
|
60
|
+
File.open(file, "w") { |f| f.write(document) }
|
61
|
+
end
|
62
|
+
parse_html_from_file(file) # Why always parse the file? See Note 001, below.
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.parse_csv_from_uri(uri)
|
49
66
|
puts "Fetching #{uri}..."
|
50
67
|
open(uri, headers) do |io|
|
51
68
|
Nokogiri::HTML::Document.parse(io)
|
52
69
|
end
|
53
70
|
end
|
54
71
|
|
72
|
+
def self.parse_html_from_uri(uri)
|
73
|
+
puts "Fetching #{uri}..."
|
74
|
+
open(uri, headers) do |io|
|
75
|
+
FasterCSV.parse(io)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
55
79
|
# ActiveSupport 2.3.5 adds @_rails_html_safe aggressively.
|
56
80
|
# This method removes it so you can output clean YAML.
|
57
81
|
def self.plain_string(s)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datacatalog-importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David James
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02
|
12
|
+
date: 2010-03-02 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|