datacatalog-importer 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -12,7 +12,7 @@ begin
12
12
  gem.authors = ["David James"]
13
13
  gem.add_dependency "nokogiri", ">= 1.4.2"
14
14
  gem.add_dependency "datacatalog", ">= 0.4.15"
15
- gem.add_development_dependency "rspec", ">= 1.2.9"
15
+ gem.add_development_dependency "rspec", ">= 1.3.0"
16
16
  # gem is a Gem::Specification...
17
17
  # see http://www.rubygems.org/read/chapter/20 for additional settings
18
18
  end
@@ -31,11 +31,11 @@ end
31
31
  # rdoc.rdoc_files.include('lib/**/*.rb')
32
32
  # end
33
33
  #
34
- # require 'spec/rake/spectask'
35
- # Spec::Rake::SpecTask.new(:spec) do |spec|
36
- # spec.libs << 'lib' << 'spec'
37
- # spec.spec_files = FileList['spec/**/*_spec.rb']
38
- # end
34
+ require 'spec/rake/spectask'
35
+ Spec::Rake::SpecTask.new(:spec) do |spec|
36
+ spec.libs << 'lib' << 'spec'
37
+ spec.spec_files = FileList['spec/**/*_spec.rb']
38
+ end
39
39
  #
40
40
  # Spec::Rake::SpecTask.new(:rcov) do |spec|
41
41
  # spec.libs << 'lib' << 'spec'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{datacatalog-importer}
8
- s.version = "0.2.1"
8
+ s.version = "0.2.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["David James"]
@@ -40,6 +40,8 @@ Gem::Specification.new do |s|
40
40
  "natdat_is_hungry.md",
41
41
  "spec/spec.opts",
42
42
  "spec/spec_helper.rb",
43
+ "spec/test.json",
44
+ "spec/test.xml",
43
45
  "spec/utility_spec.rb"
44
46
  ]
45
47
  s.homepage = %q{http://github.com/sunlightlabs/datacatalog-importer}
@@ -59,16 +61,16 @@ Gem::Specification.new do |s|
59
61
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
60
62
  s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.2"])
61
63
  s.add_runtime_dependency(%q<datacatalog>, [">= 0.4.15"])
62
- s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
64
+ s.add_development_dependency(%q<rspec>, [">= 1.3.0"])
63
65
  else
64
66
  s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
65
67
  s.add_dependency(%q<datacatalog>, [">= 0.4.15"])
66
- s.add_dependency(%q<rspec>, [">= 1.2.9"])
68
+ s.add_dependency(%q<rspec>, [">= 1.3.0"])
67
69
  end
68
70
  else
69
71
  s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
70
72
  s.add_dependency(%q<datacatalog>, [">= 0.4.15"])
71
- s.add_dependency(%q<rspec>, [">= 1.2.9"])
73
+ s.add_dependency(%q<rspec>, [">= 1.3.0"])
72
74
  end
73
75
  end
74
76
 
@@ -1,4 +1,5 @@
1
1
  require 'fastercsv'
2
+ require 'json'
2
3
  require 'nokogiri'
3
4
  require 'open-uri'
4
5
 
@@ -11,7 +12,7 @@ module DataCatalog
11
12
  def self.absolute_url(base_url, url)
12
13
  plain_string(URI.parse(base_url).merge(url).to_s)
13
14
  end
14
-
15
+
15
16
  def self.normalize_url(url)
16
17
  uri = URI.parse(url).normalize
17
18
  unless uri.scheme
@@ -56,7 +57,7 @@ module DataCatalog
56
57
 
57
58
  def self.headers
58
59
  {
59
- "UserAgent" => "National Data Catalog Importer/0.2.1",
60
+ "UserAgent" => "National Data Catalog Importer/0.2.2",
60
61
  }
61
62
  end
62
63
 
@@ -107,14 +108,14 @@ module DataCatalog
107
108
  end
108
109
 
109
110
  def self.parse_csv_from_uri(uri, options={})
110
- data = fetch(uri)
111
+ data = fetch(uri, options)
111
112
  FasterCSV.parse(data, options)
112
113
  end
113
114
 
114
115
  def self.parse_csv_from_file_or_uri(uri, file, options={})
115
116
  force_fetch = options.delete(:force_fetch) || false
116
117
  if force_fetch || !File.exist?(file)
117
- document = fetch(uri)
118
+ document = fetch(uri, options)
118
119
  File.open(file, "w") { |f| f.write(document) }
119
120
  end
120
121
  parse_csv_from_file(file, options)
@@ -128,8 +129,8 @@ module DataCatalog
128
129
  end
129
130
  end
130
131
 
131
- def self.parse_html_from_uri(uri)
132
- data = fetch(uri)
132
+ def self.parse_html_from_uri(uri, options={})
133
+ data = fetch(uri, options)
133
134
  Nokogiri::HTML::Document.parse(data)
134
135
  end
135
136
 
@@ -138,7 +139,30 @@ module DataCatalog
138
139
  document = parse_html_from_uri(uri)
139
140
  File.open(file, "w") { |f| f.write(document) }
140
141
  end
141
- parse_html_from_file(file) # Why always parse the file? See Note 001, below.
142
+ # Why always parse the file? See Note 001, below.
143
+ parse_html_from_file(file)
144
+ end
145
+
146
+ # == JSON
147
+
148
+ def self.parse_json_from_file(filename)
149
+ File.open(filename) do |f|
150
+ JSON.parse(f.read)
151
+ end
152
+ end
153
+
154
+ def self.parse_json_from_uri(uri, options={})
155
+ data = fetch(uri, options)
156
+ JSON.parse(data)
157
+ end
158
+
159
+ def self.parse_json_from_file_or_uri(uri, file, options={})
160
+ if options[:force_fetch] || !File.exist?(file)
161
+ document = parse_json_from_uri(uri)
162
+ File.open(file, "w") { |f| f.write(document) }
163
+ end
164
+ # Why always parse the file? See Note 001, below.
165
+ parse_json_from_file(file)
142
166
  end
143
167
 
144
168
  # == XML
@@ -149,8 +173,8 @@ module DataCatalog
149
173
  end
150
174
  end
151
175
 
152
- def self.parse_xml_from_uri(uri)
153
- data = fetch(uri)
176
+ def self.parse_xml_from_uri(uri, options={})
177
+ data = fetch(uri, options)
154
178
  Nokogiri::XML::Document.parse(data)
155
179
  end
156
180
 
@@ -159,7 +183,8 @@ module DataCatalog
159
183
  document = parse_xml_from_uri(uri)
160
184
  File.open(file, "w") { |f| f.write(document) }
161
185
  end
162
- parse_xml_from_file(file) # Why always parse the file? See Note 001, below.
186
+ # Why always parse the file? See Note 001, below.
187
+ parse_xml_from_file(file)
163
188
  end
164
189
 
165
190
  # == YAML
@@ -0,0 +1 @@
1
+ { "stations" : ["Metro Center","Dupont Circle"] }
@@ -0,0 +1,4 @@
1
+ <stations>
2
+ <station>Metro Center</station>
3
+ <station>Dupont Circle</station>
4
+ </stations>
@@ -20,14 +20,14 @@ describe "Utility" do
20
20
  "http://sunlightlabs.com/"
21
21
  end
22
22
  end
23
-
23
+
24
24
  describe "absolute_url" do
25
25
  it "should work" do
26
26
  U.absolute_url("http://sunlightlabs.com", "/contact").should ==
27
27
  "http://sunlightlabs.com/contact"
28
28
  end
29
29
  end
30
-
30
+
31
31
  describe "single_line_clean" do
32
32
  it "should clean up leading and trailing whitespace" do
33
33
  U.single_line_clean("\t \ttext\t\t ").should == "text"
@@ -41,19 +41,18 @@ describe "Utility" do
41
41
  U.single_line_clean("sunlight\nlabs").should == "sunlight labs"
42
42
  end
43
43
  end
44
-
44
+
45
45
  describe "multi_line_clean" do
46
46
  it "should remove leading and trailing newlines" do
47
47
  input = "\nline 1\nline 2\nline 3\n"
48
48
  U.multi_line_clean(input).should == "line 1\nline 2\nline 3"
49
49
  end
50
50
  end
51
-
51
+
52
52
  describe "fetch" do
53
53
  before do
54
54
  @readable = Object.new
55
55
  @readable.stub(:read).and_return("result")
56
-
57
56
  @sleep_count = 0
58
57
  U.stub(:sleep).and_return {
59
58
  @sleep_count += 1
@@ -64,7 +63,7 @@ describe "Utility" do
64
63
  U.stub(:open).and_return(@readable)
65
64
  U.fetch("fake", :quiet => true).should == "result"
66
65
  end
67
-
66
+
68
67
  it "bad fetches below retry limit are ok" do
69
68
  @count = 0
70
69
  U.stub(:open).and_return {
@@ -75,8 +74,8 @@ describe "Utility" do
75
74
  @readable
76
75
  end
77
76
  }
78
- U.fetch("fake", :max_attempts => 3, :quiet => true).should == "result"
79
- @sleep_count.should == 2
77
+ result = U.fetch("fake", :max_attempts => 3, :quiet => true)
78
+ result.should == "result" && @sleep_count.should == 2
80
79
  end
81
80
 
82
81
  it "bad fetches above retry limit give nil" do
@@ -89,10 +88,9 @@ describe "Utility" do
89
88
  @readable
90
89
  end
91
90
  }
92
- U.fetch("fake", :max_attempts => 2, :quiet => true).should == nil
93
- @sleep_count.should == 1
91
+ result = U.fetch("fake", :max_attempts => 2, :quiet => true)
92
+ result.should == nil && @sleep_count.should == 1
94
93
  end
95
-
96
94
  end
97
95
 
98
96
  describe "standardize_name" do
@@ -100,11 +98,65 @@ describe "Utility" do
100
98
  U.standardize_name("City Administrator, Office of").should ==
101
99
  "Office of City Administrator"
102
100
  end
103
-
101
+
104
102
  it "two commas" do
105
103
  U.standardize_name("Children, Youth & Families, Department of").should ==
106
104
  "Department of Children, Youth & Families"
107
105
  end
108
106
  end
109
-
107
+
108
+ # == XML
109
+
110
+ describe "parse_xml_from_file" do
111
+ it "should work" do
112
+ file = File.dirname(__FILE__) + '/test.xml'
113
+ parsed = U.parse_xml_from_file(file)
114
+ result = parsed.xpath('.//stations/station').map(&:content)
115
+ result.should == ["Metro Center", "Dupont Circle"]
116
+ end
117
+ end
118
+
119
+ describe "parse_xml_from_uri" do
120
+ before do
121
+ @readable = Object.new
122
+ @readable.stub(:read).and_return(%(
123
+ <stations>
124
+ <station>Metro Center</station>
125
+ <station>Dupont Circle</station>
126
+ </stations>
127
+ ))
128
+ end
129
+
130
+ it "should work" do
131
+ U.stub(:open).and_return(@readable)
132
+ parsed = U.parse_xml_from_uri("fake", :quiet => true)
133
+ result = parsed.xpath('.//stations/station').map(&:content)
134
+ result.should == ["Metro Center", "Dupont Circle"]
135
+ end
136
+ end
137
+
138
+ # == JSON
139
+
140
+ describe "parse_json_from_file" do
141
+ it "should work" do
142
+ file = File.dirname(__FILE__) + '/test.json'
143
+ U.parse_json_from_file(file).should ==
144
+ { "stations" => ["Metro Center","Dupont Circle"] }
145
+ end
146
+ end
147
+
148
+ describe "parse_json_from_uri" do
149
+ before do
150
+ @readable = Object.new
151
+ @readable.stub(:read).and_return(
152
+ %({"stations":["Metro Center","Dupont Circle"]}))
153
+ end
154
+
155
+ it "should work" do
156
+ U.stub(:open).and_return(@readable)
157
+ result = U.parse_json_from_uri("fake", :quiet => true)
158
+ result.should == { "stations" => ["Metro Center","Dupont Circle"] }
159
+ end
160
+ end
161
+
110
162
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datacatalog-importer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 1
10
- version: 0.2.1
9
+ - 2
10
+ version: 0.2.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - David James
@@ -58,12 +58,12 @@ dependencies:
58
58
  requirements:
59
59
  - - ">="
60
60
  - !ruby/object:Gem::Version
61
- hash: 13
61
+ hash: 27
62
62
  segments:
63
63
  - 1
64
- - 2
65
- - 9
66
- version: 1.2.9
64
+ - 3
65
+ - 0
66
+ version: 1.3.0
67
67
  type: :development
68
68
  version_requirements: *id003
69
69
  description: This framework makes it easier to write importers for the National Data Catalog.
@@ -99,6 +99,8 @@ files:
99
99
  - natdat_is_hungry.md
100
100
  - spec/spec.opts
101
101
  - spec/spec_helper.rb
102
+ - spec/test.json
103
+ - spec/test.xml
102
104
  - spec/utility_spec.rb
103
105
  has_rdoc: true
104
106
  homepage: http://github.com/sunlightlabs/datacatalog-importer