datacatalog-importer 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.3
1
+ 0.3.0
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{datacatalog-importer}
8
- s.version = "0.2.3"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["David James"]
data/lib/utility.rb CHANGED
@@ -57,7 +57,7 @@ module DataCatalog
57
57
 
58
58
  def self.headers
59
59
  {
60
- "UserAgent" => "National Data Catalog Importer/0.2.3",
60
+ "UserAgent" => "National Data Catalog Importer/0.3.0",
61
61
  }
62
62
  end
63
63
 
@@ -101,99 +101,46 @@ module DataCatalog
101
101
  puts "Elapsed time [#{label}] %.2f s" % diff
102
102
  result
103
103
  end
104
-
105
- # == CSV ==
106
-
107
- # { :headers => true } is a common option
108
- def self.parse_csv_from_file(file, options={})
109
- extra_header_rows = options.delete(:extra_header_rows) || 0
110
- File.open(file) do |f|
111
- extra_header_rows.times { f.gets } # ignore these rows
112
- FasterCSV.parse(f, options)
113
- end
114
- end
115
-
116
- def self.parse_csv_from_uri(uri, options={})
117
- data = fetch(uri, options)
118
- FasterCSV.parse(data, options)
119
- end
120
-
121
- def self.parse_csv_from_file_or_uri(file, uri, options={})
122
- force_fetch = options.delete(:force_fetch) || false
123
- if force_fetch || !File.exist?(file)
124
- data = fetch(uri, options)
125
- File.open(file, "w") { |f| f.write(data) }
126
- else
127
- remove_fetch_options(options)
128
- end
129
- # Why always parse the file? See Note 001, below.
130
- parse_csv_from_file(file, options)
131
- end
132
-
133
- # == HTML ==
134
-
135
- def self.parse_html_from_file(file)
136
- File.open(file) do |f|
137
- Nokogiri::HTML::Document.parse(f)
138
- end
139
- end
140
-
141
- def self.parse_html_from_uri(uri, options={})
142
- data = fetch(uri, options)
143
- Nokogiri::HTML::Document.parse(data)
144
- end
145
-
146
- def self.parse_html_from_file_or_uri(file, uri, options={})
147
- force_fetch = options.delete(:force_fetch) || false
148
- if force_fetch || !File.exist?(file)
149
- data = fetch(uri, options)
150
- File.open(file, "w") { |f| f.write(data) }
151
- else
152
- remove_fetch_options(options)
153
- end
154
- # Why always parse the file? See Note 001, below.
155
- parse_html_from_file(file)
156
- end
157
-
158
- # == JSON
159
-
160
- def self.parse_json_from_file(file)
104
+
105
+ # == Parsing ===
106
+
107
+ def self.parse_file(format, file, options={})
161
108
  File.open(file) do |f|
162
- JSON.parse(f.read)
109
+ case format
110
+ when :csv
111
+ extra_header_rows = options.delete(:extra_header_rows) || 0
112
+ extra_header_rows.times { f.gets } # ignore these rows
113
+ FasterCSV.parse(f, options)
114
+ when :xml
115
+ Nokogiri::XML::Document.parse(f)
116
+ when :json
117
+ JSON.parse(f.read)
118
+ when :html
119
+ Nokogiri::HTML::Document.parse(f)
120
+ else
121
+ raise "Unexpected format : #{format.inspect}"
122
+ end
163
123
  end
164
124
  end
165
-
166
- def self.parse_json_from_uri(uri, options={})
125
+
126
+ def self.parse_uri(format, uri, options={})
167
127
  data = fetch(uri, options)
168
- JSON.parse(data)
169
- end
170
-
171
- def self.parse_json_from_file_or_uri(file, uri, options={})
172
- force_fetch = options.delete(:force_fetch) || false
173
- if force_fetch || !File.exist?(file)
174
- data = fetch(uri, options)
175
- File.open(file, "w") { |f| f.write(data) }
128
+ case format
129
+ when :csv
130
+ # TODO: support extra_header_rows option
131
+ FasterCSV.parse(data, options)
132
+ when :xml
133
+ Nokogiri::XML::Document.parse(data)
134
+ when :json
135
+ JSON.parse(data)
136
+ when :html
137
+ Nokogiri::HTML::Document.parse(data)
176
138
  else
177
- remove_fetch_options(options)
139
+ raise "Unexpected format : #{format.inspect}"
178
140
  end
179
- # Why always parse the file? See Note 001, below.
180
- parse_json_from_file(file)
181
141
  end
182
-
183
- # == XML
184
-
185
- def self.parse_xml_from_file(file)
186
- File.open(file) do |f|
187
- Nokogiri::XML::Document.parse(f)
188
- end
189
- end
190
-
191
- def self.parse_xml_from_uri(uri, options={})
192
- data = fetch(uri, options)
193
- Nokogiri::XML::Document.parse(data)
194
- end
195
-
196
- def self.parse_xml_from_file_or_uri(file, uri, options={})
142
+
143
+ def self.parse_file_or_uri(format, file, uri, options={})
197
144
  force_fetch = options.delete(:force_fetch) || false
198
145
  if force_fetch || !File.exist?(file)
199
146
  data = fetch(uri, options)
@@ -202,7 +149,7 @@ module DataCatalog
202
149
  remove_fetch_options(options)
203
150
  end
204
151
  # Why always parse the file? See Note 001, below.
205
- parse_xml_from_file(file)
152
+ parse_file(format, file, options)
206
153
  end
207
154
 
208
155
  # == YAML
data/spec/utility_spec.rb CHANGED
@@ -106,28 +106,28 @@ describe "Utility" do
106
106
  end
107
107
 
108
108
  describe "csv" do
109
- describe "parse_csv_from_file" do
109
+ describe "parse_file" do
110
110
  it "should work" do
111
111
  file = File.dirname(__FILE__) + '/test.csv'
112
- result = U.parse_csv_from_file(file)
112
+ result = U.parse_file(:csv, file)
113
113
  result.should == [["Metro Center", "Dupont Circle"]]
114
114
  end
115
115
  end
116
116
 
117
- describe "parse_csv_from_uri" do
117
+ describe "parse_uri" do
118
118
  it "should work" do
119
119
  readable = Object.new
120
120
  readable.stub(:read).and_return(%(Metro Center,Dupont Circle))
121
121
  U.stub(:open).and_return(readable)
122
- result = U.parse_csv_from_uri("fake", :quiet => true)
122
+ result = U.parse_uri(:csv, "fake", :quiet => true)
123
123
  result.should == [["Metro Center", "Dupont Circle"]]
124
124
  end
125
125
  end
126
126
 
127
- describe "parse_csv_from_file_or_uri" do
127
+ describe "parse_file_or_uri" do
128
128
  it "should work when file present" do
129
129
  file = File.dirname(__FILE__) + '/test.csv'
130
- result = U.parse_csv_from_file_or_uri(file, "fake", :quiet => true)
130
+ result = U.parse_file_or_uri(:csv, file, "fake", :quiet => true)
131
131
  result.should == [["Metro Center", "Dupont Circle"]]
132
132
  end
133
133
 
@@ -137,7 +137,7 @@ describe "Utility" do
137
137
  U.stub(:open).and_return(readable)
138
138
  file = File.dirname(__FILE__) + "/missing.csv"
139
139
  FileUtils.rm(file) if File.exists?(file)
140
- result = U.parse_csv_from_file_or_uri(file, "fake", :quiet => true)
140
+ result = U.parse_file_or_uri(:csv, file, "fake", :quiet => true)
141
141
  FileUtils.rm(file) if File.exists?(file)
142
142
  result.should == [["Metro Center", "Dupont Circle"]]
143
143
  end
@@ -145,16 +145,16 @@ describe "Utility" do
145
145
  end
146
146
 
147
147
  describe "html" do
148
- describe "parse_html_from_file" do
148
+ describe "parse_file" do
149
149
  it "should work" do
150
150
  file = File.dirname(__FILE__) + '/test.html'
151
- parsed = U.parse_html_from_file(file)
151
+ parsed = U.parse_file(:html, file)
152
152
  result = parsed.css('li').map(&:content)
153
153
  result.should == ["Metro Center", "Dupont Circle"]
154
154
  end
155
155
  end
156
156
 
157
- describe "parse_html_from_uri" do
157
+ describe "parse_uri" do
158
158
  it "should work" do
159
159
  readable = Object.new
160
160
  readable.stub(:read).and_return(%(
@@ -168,16 +168,16 @@ describe "Utility" do
168
168
  </html>
169
169
  ))
170
170
  U.stub(:open).and_return(readable)
171
- parsed = U.parse_html_from_uri("fake", :quiet => true)
171
+ parsed = U.parse_uri(:html, "fake", :quiet => true)
172
172
  result = parsed.css('li').map(&:content)
173
173
  result.should == ["Metro Center", "Dupont Circle"]
174
174
  end
175
175
  end
176
176
 
177
- describe "parse_html_from_file_or_uri" do
177
+ describe "parse_file_or_uri" do
178
178
  it "should work when file present" do
179
179
  file = File.dirname(__FILE__) + '/test.html'
180
- parsed = U.parse_html_from_file_or_uri(file, "fake", :quiet => true)
180
+ parsed = U.parse_file_or_uri(:html, file, "fake", :quiet => true)
181
181
  result = parsed.css('li').map(&:content)
182
182
  result.should == ["Metro Center", "Dupont Circle"]
183
183
  end
@@ -193,7 +193,7 @@ describe "Utility" do
193
193
  U.stub(:open).and_return(readable)
194
194
  file = File.dirname(__FILE__) + "/missing.html"
195
195
  FileUtils.rm(file) if File.exists?(file)
196
- parsed = U.parse_html_from_file_or_uri(file, "fake", :quiet => true)
196
+ parsed = U.parse_file_or_uri(:html, file, "fake", :quiet => true)
197
197
  FileUtils.rm(file) if File.exists?(file)
198
198
  result = parsed.xpath('.//stations/station').map(&:content)
199
199
  result.should == ["Metro Center", "Dupont Circle"]
@@ -202,29 +202,29 @@ describe "Utility" do
202
202
  end
203
203
 
204
204
  describe "json" do
205
- describe "parse_json_from_file" do
205
+ describe "parse_file" do
206
206
  it "should work" do
207
207
  file = File.dirname(__FILE__) + '/test.json'
208
- U.parse_json_from_file(file).should ==
208
+ U.parse_file(:json, file).should ==
209
209
  { "stations" => ["Metro Center", "Dupont Circle"] }
210
210
  end
211
211
  end
212
212
 
213
- describe "parse_json_from_uri" do
213
+ describe "parse_uri" do
214
214
  it "should work" do
215
215
  readable = Object.new
216
216
  readable.stub(:read).and_return(
217
217
  %({"stations":["Metro Center","Dupont Circle"]}))
218
218
  U.stub(:open).and_return(readable)
219
- result = U.parse_json_from_uri("fake", :quiet => true)
219
+ result = U.parse_uri(:json, "fake", :quiet => true)
220
220
  result.should == { "stations" => ["Metro Center", "Dupont Circle"] }
221
221
  end
222
222
  end
223
223
 
224
- describe "parse_json_from_file_or_uri" do
224
+ describe "parse_file_or_uri" do
225
225
  it "should work when file present" do
226
226
  file = File.dirname(__FILE__) + '/test.json'
227
- result = U.parse_json_from_file_or_uri(file, "fake", :quiet => true)
227
+ result = U.parse_file_or_uri(:json, file, "fake", :quiet => true)
228
228
  result.should == { "stations" => ["Metro Center", "Dupont Circle"] }
229
229
  end
230
230
 
@@ -235,7 +235,7 @@ describe "Utility" do
235
235
  U.stub(:open).and_return(readable)
236
236
  file = File.dirname(__FILE__) + "/missing.json"
237
237
  FileUtils.rm(file) if File.exists?(file)
238
- result = U.parse_json_from_file_or_uri(file, "fake", :quiet => true)
238
+ result = U.parse_file_or_uri(:json, file, "fake", :quiet => true)
239
239
  FileUtils.rm(file) if File.exists?(file)
240
240
  result.should == { "stations" => ["Metro Center", "Dupont Circle"] }
241
241
  end
@@ -243,16 +243,16 @@ describe "Utility" do
243
243
  end
244
244
 
245
245
  describe "xml" do
246
- describe "parse_xml_from_file" do
246
+ describe "parse_file" do
247
247
  it "should work" do
248
248
  file = File.dirname(__FILE__) + '/test.xml'
249
- parsed = U.parse_xml_from_file(file)
249
+ parsed = U.parse_file(:xml, file)
250
250
  result = parsed.xpath('.//stations/station').map(&:content)
251
251
  result.should == ["Metro Center", "Dupont Circle"]
252
252
  end
253
253
  end
254
254
 
255
- describe "parse_xml_from_uri" do
255
+ describe "parse_uri" do
256
256
  it "should work" do
257
257
  readable = Object.new
258
258
  readable.stub(:read).and_return(%(
@@ -262,16 +262,16 @@ describe "Utility" do
262
262
  </stations>
263
263
  ))
264
264
  U.stub(:open).and_return(readable)
265
- parsed = U.parse_xml_from_uri("fake", :quiet => true)
265
+ parsed = U.parse_uri(:xml, "fake", :quiet => true)
266
266
  result = parsed.xpath('.//stations/station').map(&:content)
267
267
  result.should == ["Metro Center", "Dupont Circle"]
268
268
  end
269
269
  end
270
270
 
271
- describe "parse_xml_from_file_or_uri" do
271
+ describe "parse_file_or_uri" do
272
272
  it "should work when file present" do
273
273
  file = File.dirname(__FILE__) + '/test.xml'
274
- parsed = U.parse_xml_from_file_or_uri(file, "fake", :quiet => true)
274
+ parsed = U.parse_file_or_uri(:xml, file, "fake", :quiet => true)
275
275
  result = parsed.xpath('.//stations/station').map(&:content)
276
276
  result.should == ["Metro Center", "Dupont Circle"]
277
277
  end
@@ -287,7 +287,7 @@ describe "Utility" do
287
287
  U.stub(:open).and_return(readable)
288
288
  file = File.dirname(__FILE__) + "/missing.xml"
289
289
  FileUtils.rm(file) if File.exists?(file)
290
- parsed = U.parse_xml_from_file_or_uri(file, "fake", :quiet => true)
290
+ parsed = U.parse_file_or_uri(:xml, file, "fake", :quiet => true)
291
291
  FileUtils.rm(file) if File.exists?(file)
292
292
  result = parsed.xpath('.//stations/station').map(&:content)
293
293
  result.should == ["Metro Center", "Dupont Circle"]
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datacatalog-importer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 2
9
8
  - 3
10
- version: 0.2.3
9
+ - 0
10
+ version: 0.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - David James