datacatalog-importer 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.3
1
+ 0.3.0
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{datacatalog-importer}
8
- s.version = "0.2.3"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["David James"]
data/lib/utility.rb CHANGED
@@ -57,7 +57,7 @@ module DataCatalog
57
57
 
58
58
  def self.headers
59
59
  {
60
- "UserAgent" => "National Data Catalog Importer/0.2.3",
60
+ "UserAgent" => "National Data Catalog Importer/0.3.0",
61
61
  }
62
62
  end
63
63
 
@@ -101,99 +101,46 @@ module DataCatalog
101
101
  puts "Elapsed time [#{label}] %.2f s" % diff
102
102
  result
103
103
  end
104
-
105
- # == CSV ==
106
-
107
- # { :headers => true } is a common option
108
- def self.parse_csv_from_file(file, options={})
109
- extra_header_rows = options.delete(:extra_header_rows) || 0
110
- File.open(file) do |f|
111
- extra_header_rows.times { f.gets } # ignore these rows
112
- FasterCSV.parse(f, options)
113
- end
114
- end
115
-
116
- def self.parse_csv_from_uri(uri, options={})
117
- data = fetch(uri, options)
118
- FasterCSV.parse(data, options)
119
- end
120
-
121
- def self.parse_csv_from_file_or_uri(file, uri, options={})
122
- force_fetch = options.delete(:force_fetch) || false
123
- if force_fetch || !File.exist?(file)
124
- data = fetch(uri, options)
125
- File.open(file, "w") { |f| f.write(data) }
126
- else
127
- remove_fetch_options(options)
128
- end
129
- # Why always parse the file? See Note 001, below.
130
- parse_csv_from_file(file, options)
131
- end
132
-
133
- # == HTML ==
134
-
135
- def self.parse_html_from_file(file)
136
- File.open(file) do |f|
137
- Nokogiri::HTML::Document.parse(f)
138
- end
139
- end
140
-
141
- def self.parse_html_from_uri(uri, options={})
142
- data = fetch(uri, options)
143
- Nokogiri::HTML::Document.parse(data)
144
- end
145
-
146
- def self.parse_html_from_file_or_uri(file, uri, options={})
147
- force_fetch = options.delete(:force_fetch) || false
148
- if force_fetch || !File.exist?(file)
149
- data = fetch(uri, options)
150
- File.open(file, "w") { |f| f.write(data) }
151
- else
152
- remove_fetch_options(options)
153
- end
154
- # Why always parse the file? See Note 001, below.
155
- parse_html_from_file(file)
156
- end
157
-
158
- # == JSON
159
-
160
- def self.parse_json_from_file(file)
104
+
105
+ # == Parsing ===
106
+
107
+ def self.parse_file(format, file, options={})
161
108
  File.open(file) do |f|
162
- JSON.parse(f.read)
109
+ case format
110
+ when :csv
111
+ extra_header_rows = options.delete(:extra_header_rows) || 0
112
+ extra_header_rows.times { f.gets } # ignore these rows
113
+ FasterCSV.parse(f, options)
114
+ when :xml
115
+ Nokogiri::XML::Document.parse(f)
116
+ when :json
117
+ JSON.parse(f.read)
118
+ when :html
119
+ Nokogiri::HTML::Document.parse(f)
120
+ else
121
+ raise "Unexpected format : #{format.inspect}"
122
+ end
163
123
  end
164
124
  end
165
-
166
- def self.parse_json_from_uri(uri, options={})
125
+
126
+ def self.parse_uri(format, uri, options={})
167
127
  data = fetch(uri, options)
168
- JSON.parse(data)
169
- end
170
-
171
- def self.parse_json_from_file_or_uri(file, uri, options={})
172
- force_fetch = options.delete(:force_fetch) || false
173
- if force_fetch || !File.exist?(file)
174
- data = fetch(uri, options)
175
- File.open(file, "w") { |f| f.write(data) }
128
+ case format
129
+ when :csv
130
+ # TODO: support extra_header_rows option
131
+ FasterCSV.parse(data, options)
132
+ when :xml
133
+ Nokogiri::XML::Document.parse(data)
134
+ when :json
135
+ JSON.parse(data)
136
+ when :html
137
+ Nokogiri::HTML::Document.parse(data)
176
138
  else
177
- remove_fetch_options(options)
139
+ raise "Unexpected format : #{format.inspect}"
178
140
  end
179
- # Why always parse the file? See Note 001, below.
180
- parse_json_from_file(file)
181
141
  end
182
-
183
- # == XML
184
-
185
- def self.parse_xml_from_file(file)
186
- File.open(file) do |f|
187
- Nokogiri::XML::Document.parse(f)
188
- end
189
- end
190
-
191
- def self.parse_xml_from_uri(uri, options={})
192
- data = fetch(uri, options)
193
- Nokogiri::XML::Document.parse(data)
194
- end
195
-
196
- def self.parse_xml_from_file_or_uri(file, uri, options={})
142
+
143
+ def self.parse_file_or_uri(format, file, uri, options={})
197
144
  force_fetch = options.delete(:force_fetch) || false
198
145
  if force_fetch || !File.exist?(file)
199
146
  data = fetch(uri, options)
@@ -202,7 +149,7 @@ module DataCatalog
202
149
  remove_fetch_options(options)
203
150
  end
204
151
  # Why always parse the file? See Note 001, below.
205
- parse_xml_from_file(file)
152
+ parse_file(format, file, options)
206
153
  end
207
154
 
208
155
  # == YAML
data/spec/utility_spec.rb CHANGED
@@ -106,28 +106,28 @@ describe "Utility" do
106
106
  end
107
107
 
108
108
  describe "csv" do
109
- describe "parse_csv_from_file" do
109
+ describe "parse_file" do
110
110
  it "should work" do
111
111
  file = File.dirname(__FILE__) + '/test.csv'
112
- result = U.parse_csv_from_file(file)
112
+ result = U.parse_file(:csv, file)
113
113
  result.should == [["Metro Center", "Dupont Circle"]]
114
114
  end
115
115
  end
116
116
 
117
- describe "parse_csv_from_uri" do
117
+ describe "parse_uri" do
118
118
  it "should work" do
119
119
  readable = Object.new
120
120
  readable.stub(:read).and_return(%(Metro Center,Dupont Circle))
121
121
  U.stub(:open).and_return(readable)
122
- result = U.parse_csv_from_uri("fake", :quiet => true)
122
+ result = U.parse_uri(:csv, "fake", :quiet => true)
123
123
  result.should == [["Metro Center", "Dupont Circle"]]
124
124
  end
125
125
  end
126
126
 
127
- describe "parse_csv_from_file_or_uri" do
127
+ describe "parse_file_or_uri" do
128
128
  it "should work when file present" do
129
129
  file = File.dirname(__FILE__) + '/test.csv'
130
- result = U.parse_csv_from_file_or_uri(file, "fake", :quiet => true)
130
+ result = U.parse_file_or_uri(:csv, file, "fake", :quiet => true)
131
131
  result.should == [["Metro Center", "Dupont Circle"]]
132
132
  end
133
133
 
@@ -137,7 +137,7 @@ describe "Utility" do
137
137
  U.stub(:open).and_return(readable)
138
138
  file = File.dirname(__FILE__) + "/missing.csv"
139
139
  FileUtils.rm(file) if File.exists?(file)
140
- result = U.parse_csv_from_file_or_uri(file, "fake", :quiet => true)
140
+ result = U.parse_file_or_uri(:csv, file, "fake", :quiet => true)
141
141
  FileUtils.rm(file) if File.exists?(file)
142
142
  result.should == [["Metro Center", "Dupont Circle"]]
143
143
  end
@@ -145,16 +145,16 @@ describe "Utility" do
145
145
  end
146
146
 
147
147
  describe "html" do
148
- describe "parse_html_from_file" do
148
+ describe "parse_file" do
149
149
  it "should work" do
150
150
  file = File.dirname(__FILE__) + '/test.html'
151
- parsed = U.parse_html_from_file(file)
151
+ parsed = U.parse_file(:html, file)
152
152
  result = parsed.css('li').map(&:content)
153
153
  result.should == ["Metro Center", "Dupont Circle"]
154
154
  end
155
155
  end
156
156
 
157
- describe "parse_html_from_uri" do
157
+ describe "parse_uri" do
158
158
  it "should work" do
159
159
  readable = Object.new
160
160
  readable.stub(:read).and_return(%(
@@ -168,16 +168,16 @@ describe "Utility" do
168
168
  </html>
169
169
  ))
170
170
  U.stub(:open).and_return(readable)
171
- parsed = U.parse_html_from_uri("fake", :quiet => true)
171
+ parsed = U.parse_uri(:html, "fake", :quiet => true)
172
172
  result = parsed.css('li').map(&:content)
173
173
  result.should == ["Metro Center", "Dupont Circle"]
174
174
  end
175
175
  end
176
176
 
177
- describe "parse_html_from_file_or_uri" do
177
+ describe "parse_file_or_uri" do
178
178
  it "should work when file present" do
179
179
  file = File.dirname(__FILE__) + '/test.html'
180
- parsed = U.parse_html_from_file_or_uri(file, "fake", :quiet => true)
180
+ parsed = U.parse_file_or_uri(:html, file, "fake", :quiet => true)
181
181
  result = parsed.css('li').map(&:content)
182
182
  result.should == ["Metro Center", "Dupont Circle"]
183
183
  end
@@ -193,7 +193,7 @@ describe "Utility" do
193
193
  U.stub(:open).and_return(readable)
194
194
  file = File.dirname(__FILE__) + "/missing.html"
195
195
  FileUtils.rm(file) if File.exists?(file)
196
- parsed = U.parse_html_from_file_or_uri(file, "fake", :quiet => true)
196
+ parsed = U.parse_file_or_uri(:html, file, "fake", :quiet => true)
197
197
  FileUtils.rm(file) if File.exists?(file)
198
198
  result = parsed.xpath('.//stations/station').map(&:content)
199
199
  result.should == ["Metro Center", "Dupont Circle"]
@@ -202,29 +202,29 @@ describe "Utility" do
202
202
  end
203
203
 
204
204
  describe "json" do
205
- describe "parse_json_from_file" do
205
+ describe "parse_file" do
206
206
  it "should work" do
207
207
  file = File.dirname(__FILE__) + '/test.json'
208
- U.parse_json_from_file(file).should ==
208
+ U.parse_file(:json, file).should ==
209
209
  { "stations" => ["Metro Center", "Dupont Circle"] }
210
210
  end
211
211
  end
212
212
 
213
- describe "parse_json_from_uri" do
213
+ describe "parse_uri" do
214
214
  it "should work" do
215
215
  readable = Object.new
216
216
  readable.stub(:read).and_return(
217
217
  %({"stations":["Metro Center","Dupont Circle"]}))
218
218
  U.stub(:open).and_return(readable)
219
- result = U.parse_json_from_uri("fake", :quiet => true)
219
+ result = U.parse_uri(:json, "fake", :quiet => true)
220
220
  result.should == { "stations" => ["Metro Center", "Dupont Circle"] }
221
221
  end
222
222
  end
223
223
 
224
- describe "parse_json_from_file_or_uri" do
224
+ describe "parse_file_or_uri" do
225
225
  it "should work when file present" do
226
226
  file = File.dirname(__FILE__) + '/test.json'
227
- result = U.parse_json_from_file_or_uri(file, "fake", :quiet => true)
227
+ result = U.parse_file_or_uri(:json, file, "fake", :quiet => true)
228
228
  result.should == { "stations" => ["Metro Center", "Dupont Circle"] }
229
229
  end
230
230
 
@@ -235,7 +235,7 @@ describe "Utility" do
235
235
  U.stub(:open).and_return(readable)
236
236
  file = File.dirname(__FILE__) + "/missing.json"
237
237
  FileUtils.rm(file) if File.exists?(file)
238
- result = U.parse_json_from_file_or_uri(file, "fake", :quiet => true)
238
+ result = U.parse_file_or_uri(:json, file, "fake", :quiet => true)
239
239
  FileUtils.rm(file) if File.exists?(file)
240
240
  result.should == { "stations" => ["Metro Center", "Dupont Circle"] }
241
241
  end
@@ -243,16 +243,16 @@ describe "Utility" do
243
243
  end
244
244
 
245
245
  describe "xml" do
246
- describe "parse_xml_from_file" do
246
+ describe "parse_file" do
247
247
  it "should work" do
248
248
  file = File.dirname(__FILE__) + '/test.xml'
249
- parsed = U.parse_xml_from_file(file)
249
+ parsed = U.parse_file(:xml, file)
250
250
  result = parsed.xpath('.//stations/station').map(&:content)
251
251
  result.should == ["Metro Center", "Dupont Circle"]
252
252
  end
253
253
  end
254
254
 
255
- describe "parse_xml_from_uri" do
255
+ describe "parse_uri" do
256
256
  it "should work" do
257
257
  readable = Object.new
258
258
  readable.stub(:read).and_return(%(
@@ -262,16 +262,16 @@ describe "Utility" do
262
262
  </stations>
263
263
  ))
264
264
  U.stub(:open).and_return(readable)
265
- parsed = U.parse_xml_from_uri("fake", :quiet => true)
265
+ parsed = U.parse_uri(:xml, "fake", :quiet => true)
266
266
  result = parsed.xpath('.//stations/station').map(&:content)
267
267
  result.should == ["Metro Center", "Dupont Circle"]
268
268
  end
269
269
  end
270
270
 
271
- describe "parse_xml_from_file_or_uri" do
271
+ describe "parse_file_or_uri" do
272
272
  it "should work when file present" do
273
273
  file = File.dirname(__FILE__) + '/test.xml'
274
- parsed = U.parse_xml_from_file_or_uri(file, "fake", :quiet => true)
274
+ parsed = U.parse_file_or_uri(:xml, file, "fake", :quiet => true)
275
275
  result = parsed.xpath('.//stations/station').map(&:content)
276
276
  result.should == ["Metro Center", "Dupont Circle"]
277
277
  end
@@ -287,7 +287,7 @@ describe "Utility" do
287
287
  U.stub(:open).and_return(readable)
288
288
  file = File.dirname(__FILE__) + "/missing.xml"
289
289
  FileUtils.rm(file) if File.exists?(file)
290
- parsed = U.parse_xml_from_file_or_uri(file, "fake", :quiet => true)
290
+ parsed = U.parse_file_or_uri(:xml, file, "fake", :quiet => true)
291
291
  FileUtils.rm(file) if File.exists?(file)
292
292
  result = parsed.xpath('.//stations/station').map(&:content)
293
293
  result.should == ["Metro Center", "Dupont Circle"]
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datacatalog-importer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 2
9
8
  - 3
10
- version: 0.2.3
9
+ - 0
10
+ version: 0.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - David James