datacatalog-importer 0.1.15 → 0.1.16
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/datacatalog-importer.gemspec +1 -1
- data/lib/utility.rb +25 -29
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.16
|
data/lib/utility.rb
CHANGED
@@ -5,13 +5,13 @@ require 'open-uri'
|
|
5
5
|
module DataCatalog
|
6
6
|
module ImporterFramework
|
7
7
|
class Utility
|
8
|
-
|
8
|
+
|
9
9
|
# == URLs ==
|
10
|
-
|
10
|
+
|
11
11
|
def self.absolute_url(page_url, url)
|
12
12
|
Utility.plain_string(URI.parse(page_url).merge(url).to_s)
|
13
13
|
end
|
14
|
-
|
14
|
+
|
15
15
|
# == Cleaning ==
|
16
16
|
|
17
17
|
def self.single_line_clean(s)
|
@@ -32,9 +32,9 @@ module DataCatalog
|
|
32
32
|
end
|
33
33
|
s
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
36
|
# == API ===
|
37
|
-
|
37
|
+
|
38
38
|
def self.setup_api(api_key, base_uri)
|
39
39
|
DataCatalog.api_key = api_key
|
40
40
|
DataCatalog.base_uri = base_uri
|
@@ -42,10 +42,10 @@ module DataCatalog
|
|
42
42
|
|
43
43
|
def self.headers
|
44
44
|
{
|
45
|
-
"UserAgent" => "National Data Catalog Importer/0.1.
|
45
|
+
"UserAgent" => "National Data Catalog Importer/0.1.16",
|
46
46
|
}
|
47
47
|
end
|
48
|
-
|
48
|
+
|
49
49
|
# == Various ==
|
50
50
|
|
51
51
|
# For background on rescuing net/http errors, see:
|
@@ -66,7 +66,7 @@ module DataCatalog
|
|
66
66
|
end
|
67
67
|
end
|
68
68
|
end
|
69
|
-
|
69
|
+
|
70
70
|
def self.report_timing(label)
|
71
71
|
puts "Starting: [#{label}]"
|
72
72
|
t0 = Time.now
|
@@ -76,7 +76,7 @@ module DataCatalog
|
|
76
76
|
puts "Elapsed time [#{label}] %.2f s" % diff
|
77
77
|
result
|
78
78
|
end
|
79
|
-
|
79
|
+
|
80
80
|
# == CSV ==
|
81
81
|
|
82
82
|
# { :headers => true } is a common option
|
@@ -89,9 +89,7 @@ module DataCatalog
|
|
89
89
|
end
|
90
90
|
|
91
91
|
def self.parse_csv_from_uri(uri, options={})
|
92
|
-
|
93
|
-
data = open(uri, headers)
|
94
|
-
puts data.inspect
|
92
|
+
data = fetch(uri)
|
95
93
|
FasterCSV.parse(data, options)
|
96
94
|
end
|
97
95
|
|
@@ -103,7 +101,7 @@ module DataCatalog
|
|
103
101
|
end
|
104
102
|
parse_csv_from_file(file, options)
|
105
103
|
end
|
106
|
-
|
104
|
+
|
107
105
|
# == HTML ==
|
108
106
|
|
109
107
|
def self.parse_html_from_file(filename)
|
@@ -113,10 +111,8 @@ module DataCatalog
|
|
113
111
|
end
|
114
112
|
|
115
113
|
def self.parse_html_from_uri(uri)
|
116
|
-
|
117
|
-
|
118
|
-
Nokogiri::HTML::Document.parse(io)
|
119
|
-
end
|
114
|
+
data = fetch(uri)
|
115
|
+
Nokogiri::HTML::Document.parse(data)
|
120
116
|
end
|
121
117
|
|
122
118
|
def self.parse_html_from_file_or_uri(uri, file, options={})
|
@@ -126,7 +122,7 @@ module DataCatalog
|
|
126
122
|
end
|
127
123
|
parse_html_from_file(file) # Why always parse the file? See Note 001, below.
|
128
124
|
end
|
129
|
-
|
125
|
+
|
130
126
|
# == XML
|
131
127
|
|
132
128
|
def self.parse_xml_from_file(filename)
|
@@ -136,22 +132,22 @@ module DataCatalog
|
|
136
132
|
end
|
137
133
|
|
138
134
|
def self.parse_xml_from_uri(uri)
|
139
|
-
|
140
|
-
|
135
|
+
puts "Fetching #{uri}..."
|
136
|
+
Nokogiri::XML(open(uri))
|
141
137
|
end
|
142
|
-
|
138
|
+
|
143
139
|
def self.parse_xml_from_file_or_uri(uri, file, options={})
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
140
|
+
if options[:force_fetch] || !File.exist?(file)
|
141
|
+
document = parse_xml_from_uri(uri)
|
142
|
+
File.open(file, "w") { |f| f.write(document) }
|
143
|
+
end
|
144
|
+
parse_xml_from_file(file) # Why always parse the file? See Note 001, below.
|
149
145
|
end
|
150
|
-
|
146
|
+
|
151
147
|
# == YAML
|
152
|
-
|
148
|
+
|
153
149
|
# To load YAML use: YAML::load_file(filename)
|
154
|
-
|
150
|
+
|
155
151
|
def self.write_yaml(filename, contents)
|
156
152
|
File.open(filename, "w") do |f|
|
157
153
|
YAML::dump(contents, f)
|