RubyGems - datacatalog-importer - Versions diffs - 0.2.1 → 0.2.2 - Mend

datacatalog-importer 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/Rakefile CHANGED

@@ -12,7 +12,7 @@ begin
     gem.authors = ["David James"]
     gem.add_dependency "nokogiri", ">= 1.4.2"
     gem.add_dependency "datacatalog", ">= 0.4.15"
-    gem.add_development_dependency "rspec", ">= 1.2.9"
+    gem.add_development_dependency "rspec", ">= 1.3.0"
     # gem is a Gem::Specification...
     # see http://www.rubygems.org/read/chapter/20 for additional settings
   end
@@ -31,11 +31,11 @@ end
 #   rdoc.rdoc_files.include('lib/**/*.rb')
 # end
 #
-# require 'spec/rake/spectask'
-# Spec::Rake::SpecTask.new(:spec) do |spec|
-#   spec.libs << 'lib' << 'spec'
-#   spec.spec_files = FileList['spec/**/*_spec.rb']
-# end
+require 'spec/rake/spectask'
+Spec::Rake::SpecTask.new(:spec) do |spec|
+  spec.libs << 'lib' << 'spec'
+  spec.spec_files = FileList['spec/**/*_spec.rb']
+end
 #
 # Spec::Rake::SpecTask.new(:rcov) do |spec|
 #   spec.libs << 'lib' << 'spec'

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.2.1
1	+ 0.2.2

data/datacatalog-importer.gemspec CHANGED

@@ -5,7 +5,7 @@
 Gem::Specification.new do |s|
   s.name = %q{datacatalog-importer}
-  s.version = "0.2.1"
+  s.version = "0.2.2"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["David James"]
@@ -40,6 +40,8 @@ Gem::Specification.new do |s|
      "natdat_is_hungry.md",
      "spec/spec.opts",
      "spec/spec_helper.rb",
+     "spec/test.json",
+     "spec/test.xml",
      "spec/utility_spec.rb"
   ]
   s.homepage = %q{http://github.com/sunlightlabs/datacatalog-importer}
@@ -59,16 +61,16 @@ Gem::Specification.new do |s|
     if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
       s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.2"])
       s.add_runtime_dependency(%q<datacatalog>, [">= 0.4.15"])
-      s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
+      s.add_development_dependency(%q<rspec>, [">= 1.3.0"])
     else
       s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
       s.add_dependency(%q<datacatalog>, [">= 0.4.15"])
-      s.add_dependency(%q<rspec>, [">= 1.2.9"])
+      s.add_dependency(%q<rspec>, [">= 1.3.0"])
     end
   else
     s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
     s.add_dependency(%q<datacatalog>, [">= 0.4.15"])
-    s.add_dependency(%q<rspec>, [">= 1.2.9"])
+    s.add_dependency(%q<rspec>, [">= 1.3.0"])
   end
 end

data/lib/utility.rb CHANGED

@@ -1,4 +1,5 @@
 require 'fastercsv'
+require 'json'
 require 'nokogiri'
 require 'open-uri'
@@ -11,7 +12,7 @@ module DataCatalog
       def self.absolute_url(base_url, url)
         plain_string(URI.parse(base_url).merge(url).to_s)
       end
       def self.normalize_url(url)
         uri = URI.parse(url).normalize
         unless uri.scheme
@@ -56,7 +57,7 @@ module DataCatalog
       def self.headers
         {
-          "UserAgent" => "National Data Catalog Importer/0.2.1",
+          "UserAgent" => "National Data Catalog Importer/0.2.2",
         }
       end
@@ -107,14 +108,14 @@ module DataCatalog
       end
       def self.parse_csv_from_uri(uri, options={})
-        data = fetch(uri)
+        data = fetch(uri, options)
         FasterCSV.parse(data, options)
       end
       def self.parse_csv_from_file_or_uri(uri, file, options={})
         force_fetch = options.delete(:force_fetch) || false
         if force_fetch || !File.exist?(file)
-          document = fetch(uri)
+          document = fetch(uri, options)
           File.open(file, "w") { |f| f.write(document) }
         end
         parse_csv_from_file(file, options)
@@ -128,8 +129,8 @@ module DataCatalog
         end
       end
-      def self.parse_html_from_uri(uri)
-        data = fetch(uri)
+      def self.parse_html_from_uri(uri, options={})
+        data = fetch(uri, options)
         Nokogiri::HTML::Document.parse(data)
       end
@@ -138,7 +139,30 @@ module DataCatalog
           document = parse_html_from_uri(uri)
           File.open(file, "w") { |f| f.write(document) }
         end
-        parse_html_from_file(file) # Why always parse the file? See Note 001, below.
+        # Why always parse the file? See Note 001, below.
+        parse_html_from_file(file)
+      end
+      # == JSON
+      def self.parse_json_from_file(filename)
+        File.open(filename) do |f|
+          JSON.parse(f.read)
+        end
+      end
+      def self.parse_json_from_uri(uri, options={})
+        data = fetch(uri, options)
+        JSON.parse(data)
+      end
+      def self.parse_json_from_file_or_uri(uri, file, options={})
+        if options[:force_fetch] || !File.exist?(file)
+          document = parse_json_from_uri(uri)
+          File.open(file, "w") { |f| f.write(document) }
+        end
+        # Why always parse the file? See Note 001, below.
+        parse_json_from_file(file)
       end
       # == XML
@@ -149,8 +173,8 @@ module DataCatalog
         end
       end
-      def self.parse_xml_from_uri(uri)
-        data = fetch(uri)
+      def self.parse_xml_from_uri(uri, options={})
+        data = fetch(uri, options)
         Nokogiri::XML::Document.parse(data)
       end
@@ -159,7 +183,8 @@ module DataCatalog
           document = parse_xml_from_uri(uri)
           File.open(file, "w") { |f| f.write(document) }
         end
-        parse_xml_from_file(file) # Why always parse the file? See Note 001, below.
+        # Why always parse the file? See Note 001, below.
+        parse_xml_from_file(file)
       end
       # == YAML

data/spec/test.json ADDED

	@@ -0,0 +1 @@
1	+ { "stations" : ["Metro Center","Dupont Circle"] }

data/spec/test.xml ADDED

@@ -0,0 +1,4 @@
+<stations>
+  <station>Metro Center</station>
+  <station>Dupont Circle</station>
+</stations>

data/spec/utility_spec.rb CHANGED

@@ -20,14 +20,14 @@ describe "Utility" do
         "http://sunlightlabs.com/"
     end
   end
   describe "absolute_url" do
     it "should work" do
       U.absolute_url("http://sunlightlabs.com", "/contact").should ==
         "http://sunlightlabs.com/contact"
     end
   end
   describe "single_line_clean" do
     it "should clean up leading and trailing whitespace" do
       U.single_line_clean("\t \ttext\t\t ").should == "text"
@@ -41,19 +41,18 @@ describe "Utility" do
       U.single_line_clean("sunlight\nlabs").should == "sunlight labs"
     end
   end
   describe "multi_line_clean" do
     it "should remove leading and trailing newlines" do
       input = "\nline 1\nline 2\nline 3\n"
       U.multi_line_clean(input).should == "line 1\nline 2\nline 3"
     end
   end
   describe "fetch" do
     before do
       @readable = Object.new
       @readable.stub(:read).and_return("result")
       @sleep_count = 0
       U.stub(:sleep).and_return {
         @sleep_count += 1
@@ -64,7 +63,7 @@ describe "Utility" do
       U.stub(:open).and_return(@readable)
       U.fetch("fake", :quiet => true).should == "result"
     end
     it "bad fetches below retry limit are ok" do
       @count = 0
       U.stub(:open).and_return {
@@ -75,8 +74,8 @@ describe "Utility" do
           @readable
         end
       }
-      U.fetch("fake", :max_attempts => 3, :quiet => true).should == "result"
-      @sleep_count.should == 2
+      result = U.fetch("fake", :max_attempts => 3, :quiet => true)
+      result.should == "result" && @sleep_count.should == 2
     end
     it "bad fetches above retry limit give nil" do
@@ -89,10 +88,9 @@ describe "Utility" do
           @readable
         end
       }
-      U.fetch("fake", :max_attempts => 2, :quiet => true).should == nil
-      @sleep_count.should == 1
+      result = U.fetch("fake", :max_attempts => 2, :quiet => true)
+      result.should == nil && @sleep_count.should == 1
     end
   end
   describe "standardize_name" do
@@ -100,11 +98,65 @@ describe "Utility" do
       U.standardize_name("City Administrator, Office of").should ==
         "Office of City Administrator"
     end
     it "two commas" do
       U.standardize_name("Children, Youth & Families, Department of").should ==
         "Department of Children, Youth & Families"
     end
   end
+  # == XML
+  describe "parse_xml_from_file" do
+    it "should work" do
+      file = File.dirname(__FILE__) + '/test.xml'
+      parsed = U.parse_xml_from_file(file)
+      result = parsed.xpath('.//stations/station').map(&:content)
+      result.should == ["Metro Center", "Dupont Circle"]
+    end
+  end
+  describe "parse_xml_from_uri" do
+    before do
+      @readable = Object.new
+      @readable.stub(:read).and_return(%(
+        <stations>
+          <station>Metro Center</station>
+          <station>Dupont Circle</station>
+        </stations>
+      ))
+    end
+    it "should work" do
+      U.stub(:open).and_return(@readable)
+      parsed = U.parse_xml_from_uri("fake", :quiet => true)
+      result = parsed.xpath('.//stations/station').map(&:content)
+      result.should == ["Metro Center", "Dupont Circle"]
+    end
+  end
+  # == JSON
+  describe "parse_json_from_file" do
+    it "should work" do
+      file = File.dirname(__FILE__) + '/test.json'
+      U.parse_json_from_file(file).should ==
+        { "stations" => ["Metro Center","Dupont Circle"] }
+    end
+  end
+  describe "parse_json_from_uri" do
+    before do
+      @readable = Object.new
+      @readable.stub(:read).and_return(
+        %({"stations":["Metro Center","Dupont Circle"]}))
+    end
+    it "should work" do
+      U.stub(:open).and_return(@readable)
+      result = U.parse_json_from_uri("fake", :quiet => true)
+      result.should == { "stations" => ["Metro Center","Dupont Circle"] }
+    end
+  end
 end

metadata CHANGED

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: datacatalog-importer
 version: !ruby/object:Gem::Version
-  hash: 21
+  hash: 19
   prerelease: false
   segments:
   - 0
   - 2
-  - 1
-  version: 0.2.1
+  - 2
+  version: 0.2.2
 platform: ruby
 authors:
 - David James
@@ -58,12 +58,12 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 13
+        hash: 27
         segments:
         - 1
-        - 2
-        - 9
-        version: 1.2.9
+        - 3
+        - 0
+        version: 1.3.0
   type: :development
   version_requirements: *id003
 description: This framework makes it easier to write importers for the National Data Catalog.
@@ -99,6 +99,8 @@ files:
 - natdat_is_hungry.md
 - spec/spec.opts
 - spec/spec_helper.rb
+- spec/test.json
+- spec/test.xml
 - spec/utility_spec.rb
 has_rdoc: true
 homepage: http://github.com/sunlightlabs/datacatalog-importer