datacatalog-importer 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -12,7 +12,7 @@ begin
12
12
  gem.authors = ["David James"]
13
13
  gem.add_dependency "nokogiri", ">= 1.4.2"
14
14
  gem.add_dependency "datacatalog", ">= 0.4.15"
15
- gem.add_development_dependency "rspec", ">= 1.2.9"
15
+ gem.add_development_dependency "rspec", ">= 1.3.0"
16
16
  # gem is a Gem::Specification...
17
17
  # see http://www.rubygems.org/read/chapter/20 for additional settings
18
18
  end
@@ -31,11 +31,11 @@ end
31
31
  # rdoc.rdoc_files.include('lib/**/*.rb')
32
32
  # end
33
33
  #
34
- # require 'spec/rake/spectask'
35
- # Spec::Rake::SpecTask.new(:spec) do |spec|
36
- # spec.libs << 'lib' << 'spec'
37
- # spec.spec_files = FileList['spec/**/*_spec.rb']
38
- # end
34
+ require 'spec/rake/spectask'
35
+ Spec::Rake::SpecTask.new(:spec) do |spec|
36
+ spec.libs << 'lib' << 'spec'
37
+ spec.spec_files = FileList['spec/**/*_spec.rb']
38
+ end
39
39
  #
40
40
  # Spec::Rake::SpecTask.new(:rcov) do |spec|
41
41
  # spec.libs << 'lib' << 'spec'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{datacatalog-importer}
8
- s.version = "0.2.1"
8
+ s.version = "0.2.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["David James"]
@@ -40,6 +40,8 @@ Gem::Specification.new do |s|
40
40
  "natdat_is_hungry.md",
41
41
  "spec/spec.opts",
42
42
  "spec/spec_helper.rb",
43
+ "spec/test.json",
44
+ "spec/test.xml",
43
45
  "spec/utility_spec.rb"
44
46
  ]
45
47
  s.homepage = %q{http://github.com/sunlightlabs/datacatalog-importer}
@@ -59,16 +61,16 @@ Gem::Specification.new do |s|
59
61
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
60
62
  s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.2"])
61
63
  s.add_runtime_dependency(%q<datacatalog>, [">= 0.4.15"])
62
- s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
64
+ s.add_development_dependency(%q<rspec>, [">= 1.3.0"])
63
65
  else
64
66
  s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
65
67
  s.add_dependency(%q<datacatalog>, [">= 0.4.15"])
66
- s.add_dependency(%q<rspec>, [">= 1.2.9"])
68
+ s.add_dependency(%q<rspec>, [">= 1.3.0"])
67
69
  end
68
70
  else
69
71
  s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
70
72
  s.add_dependency(%q<datacatalog>, [">= 0.4.15"])
71
- s.add_dependency(%q<rspec>, [">= 1.2.9"])
73
+ s.add_dependency(%q<rspec>, [">= 1.3.0"])
72
74
  end
73
75
  end
74
76
 
@@ -1,4 +1,5 @@
1
1
  require 'fastercsv'
2
+ require 'json'
2
3
  require 'nokogiri'
3
4
  require 'open-uri'
4
5
 
@@ -11,7 +12,7 @@ module DataCatalog
11
12
  def self.absolute_url(base_url, url)
12
13
  plain_string(URI.parse(base_url).merge(url).to_s)
13
14
  end
14
-
15
+
15
16
  def self.normalize_url(url)
16
17
  uri = URI.parse(url).normalize
17
18
  unless uri.scheme
@@ -56,7 +57,7 @@ module DataCatalog
56
57
 
57
58
  def self.headers
58
59
  {
59
- "UserAgent" => "National Data Catalog Importer/0.2.1",
60
+ "UserAgent" => "National Data Catalog Importer/0.2.2",
60
61
  }
61
62
  end
62
63
 
@@ -107,14 +108,14 @@ module DataCatalog
107
108
  end
108
109
 
109
110
  def self.parse_csv_from_uri(uri, options={})
110
- data = fetch(uri)
111
+ data = fetch(uri, options)
111
112
  FasterCSV.parse(data, options)
112
113
  end
113
114
 
114
115
  def self.parse_csv_from_file_or_uri(uri, file, options={})
115
116
  force_fetch = options.delete(:force_fetch) || false
116
117
  if force_fetch || !File.exist?(file)
117
- document = fetch(uri)
118
+ document = fetch(uri, options)
118
119
  File.open(file, "w") { |f| f.write(document) }
119
120
  end
120
121
  parse_csv_from_file(file, options)
@@ -128,8 +129,8 @@ module DataCatalog
128
129
  end
129
130
  end
130
131
 
131
- def self.parse_html_from_uri(uri)
132
- data = fetch(uri)
132
+ def self.parse_html_from_uri(uri, options={})
133
+ data = fetch(uri, options)
133
134
  Nokogiri::HTML::Document.parse(data)
134
135
  end
135
136
 
@@ -138,7 +139,30 @@ module DataCatalog
138
139
  document = parse_html_from_uri(uri)
139
140
  File.open(file, "w") { |f| f.write(document) }
140
141
  end
141
- parse_html_from_file(file) # Why always parse the file? See Note 001, below.
142
+ # Why always parse the file? See Note 001, below.
143
+ parse_html_from_file(file)
144
+ end
145
+
146
+ # == JSON
147
+
148
+ def self.parse_json_from_file(filename)
149
+ File.open(filename) do |f|
150
+ JSON.parse(f.read)
151
+ end
152
+ end
153
+
154
+ def self.parse_json_from_uri(uri, options={})
155
+ data = fetch(uri, options)
156
+ JSON.parse(data)
157
+ end
158
+
159
+ def self.parse_json_from_file_or_uri(uri, file, options={})
160
+ if options[:force_fetch] || !File.exist?(file)
161
+ document = parse_json_from_uri(uri)
162
+ File.open(file, "w") { |f| f.write(document) }
163
+ end
164
+ # Why always parse the file? See Note 001, below.
165
+ parse_json_from_file(file)
142
166
  end
143
167
 
144
168
  # == XML
@@ -149,8 +173,8 @@ module DataCatalog
149
173
  end
150
174
  end
151
175
 
152
- def self.parse_xml_from_uri(uri)
153
- data = fetch(uri)
176
+ def self.parse_xml_from_uri(uri, options={})
177
+ data = fetch(uri, options)
154
178
  Nokogiri::XML::Document.parse(data)
155
179
  end
156
180
 
@@ -159,7 +183,8 @@ module DataCatalog
159
183
  document = parse_xml_from_uri(uri)
160
184
  File.open(file, "w") { |f| f.write(document) }
161
185
  end
162
- parse_xml_from_file(file) # Why always parse the file? See Note 001, below.
186
+ # Why always parse the file? See Note 001, below.
187
+ parse_xml_from_file(file)
163
188
  end
164
189
 
165
190
  # == YAML
@@ -0,0 +1 @@
1
+ { "stations" : ["Metro Center","Dupont Circle"] }
@@ -0,0 +1,4 @@
1
+ <stations>
2
+ <station>Metro Center</station>
3
+ <station>Dupont Circle</station>
4
+ </stations>
@@ -20,14 +20,14 @@ describe "Utility" do
20
20
  "http://sunlightlabs.com/"
21
21
  end
22
22
  end
23
-
23
+
24
24
  describe "absolute_url" do
25
25
  it "should work" do
26
26
  U.absolute_url("http://sunlightlabs.com", "/contact").should ==
27
27
  "http://sunlightlabs.com/contact"
28
28
  end
29
29
  end
30
-
30
+
31
31
  describe "single_line_clean" do
32
32
  it "should clean up leading and trailing whitespace" do
33
33
  U.single_line_clean("\t \ttext\t\t ").should == "text"
@@ -41,19 +41,18 @@ describe "Utility" do
41
41
  U.single_line_clean("sunlight\nlabs").should == "sunlight labs"
42
42
  end
43
43
  end
44
-
44
+
45
45
  describe "multi_line_clean" do
46
46
  it "should remove leading and trailing newlines" do
47
47
  input = "\nline 1\nline 2\nline 3\n"
48
48
  U.multi_line_clean(input).should == "line 1\nline 2\nline 3"
49
49
  end
50
50
  end
51
-
51
+
52
52
  describe "fetch" do
53
53
  before do
54
54
  @readable = Object.new
55
55
  @readable.stub(:read).and_return("result")
56
-
57
56
  @sleep_count = 0
58
57
  U.stub(:sleep).and_return {
59
58
  @sleep_count += 1
@@ -64,7 +63,7 @@ describe "Utility" do
64
63
  U.stub(:open).and_return(@readable)
65
64
  U.fetch("fake", :quiet => true).should == "result"
66
65
  end
67
-
66
+
68
67
  it "bad fetches below retry limit are ok" do
69
68
  @count = 0
70
69
  U.stub(:open).and_return {
@@ -75,8 +74,8 @@ describe "Utility" do
75
74
  @readable
76
75
  end
77
76
  }
78
- U.fetch("fake", :max_attempts => 3, :quiet => true).should == "result"
79
- @sleep_count.should == 2
77
+ result = U.fetch("fake", :max_attempts => 3, :quiet => true)
78
+ result.should == "result" && @sleep_count.should == 2
80
79
  end
81
80
 
82
81
  it "bad fetches above retry limit give nil" do
@@ -89,10 +88,9 @@ describe "Utility" do
89
88
  @readable
90
89
  end
91
90
  }
92
- U.fetch("fake", :max_attempts => 2, :quiet => true).should == nil
93
- @sleep_count.should == 1
91
+ result = U.fetch("fake", :max_attempts => 2, :quiet => true)
92
+ result.should == nil && @sleep_count.should == 1
94
93
  end
95
-
96
94
  end
97
95
 
98
96
  describe "standardize_name" do
@@ -100,11 +98,65 @@ describe "Utility" do
100
98
  U.standardize_name("City Administrator, Office of").should ==
101
99
  "Office of City Administrator"
102
100
  end
103
-
101
+
104
102
  it "two commas" do
105
103
  U.standardize_name("Children, Youth & Families, Department of").should ==
106
104
  "Department of Children, Youth & Families"
107
105
  end
108
106
  end
109
-
107
+
108
+ # == XML
109
+
110
+ describe "parse_xml_from_file" do
111
+ it "should work" do
112
+ file = File.dirname(__FILE__) + '/test.xml'
113
+ parsed = U.parse_xml_from_file(file)
114
+ result = parsed.xpath('.//stations/station').map(&:content)
115
+ result.should == ["Metro Center", "Dupont Circle"]
116
+ end
117
+ end
118
+
119
+ describe "parse_xml_from_uri" do
120
+ before do
121
+ @readable = Object.new
122
+ @readable.stub(:read).and_return(%(
123
+ <stations>
124
+ <station>Metro Center</station>
125
+ <station>Dupont Circle</station>
126
+ </stations>
127
+ ))
128
+ end
129
+
130
+ it "should work" do
131
+ U.stub(:open).and_return(@readable)
132
+ parsed = U.parse_xml_from_uri("fake", :quiet => true)
133
+ result = parsed.xpath('.//stations/station').map(&:content)
134
+ result.should == ["Metro Center", "Dupont Circle"]
135
+ end
136
+ end
137
+
138
+ # == JSON
139
+
140
+ describe "parse_json_from_file" do
141
+ it "should work" do
142
+ file = File.dirname(__FILE__) + '/test.json'
143
+ U.parse_json_from_file(file).should ==
144
+ { "stations" => ["Metro Center","Dupont Circle"] }
145
+ end
146
+ end
147
+
148
+ describe "parse_json_from_uri" do
149
+ before do
150
+ @readable = Object.new
151
+ @readable.stub(:read).and_return(
152
+ %({"stations":["Metro Center","Dupont Circle"]}))
153
+ end
154
+
155
+ it "should work" do
156
+ U.stub(:open).and_return(@readable)
157
+ result = U.parse_json_from_uri("fake", :quiet => true)
158
+ result.should == { "stations" => ["Metro Center","Dupont Circle"] }
159
+ end
160
+ end
161
+
110
162
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datacatalog-importer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 1
10
- version: 0.2.1
9
+ - 2
10
+ version: 0.2.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - David James
@@ -58,12 +58,12 @@ dependencies:
58
58
  requirements:
59
59
  - - ">="
60
60
  - !ruby/object:Gem::Version
61
- hash: 13
61
+ hash: 27
62
62
  segments:
63
63
  - 1
64
- - 2
65
- - 9
66
- version: 1.2.9
64
+ - 3
65
+ - 0
66
+ version: 1.3.0
67
67
  type: :development
68
68
  version_requirements: *id003
69
69
  description: This framework makes it easier to write importers for the National Data Catalog.
@@ -99,6 +99,8 @@ files:
99
99
  - natdat_is_hungry.md
100
100
  - spec/spec.opts
101
101
  - spec/spec_helper.rb
102
+ - spec/test.json
103
+ - spec/test.xml
102
104
  - spec/utility_spec.rb
103
105
  has_rdoc: true
104
106
  homepage: http://github.com/sunlightlabs/datacatalog-importer