RubyGems - data_kitten - Versions diffs - 1.3.0 → 1.3.1 - Mend

data_kitten 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +8 -8
data/README.md +1 -1
data/bin/data_kitten +1 -1
data/lib/data_kitten.rb +1 -2
data/lib/data_kitten/dataset.rb +25 -11
data/lib/data_kitten/distribution.rb +5 -16
data/lib/data_kitten/distribution_format.rb +26 -25
data/lib/data_kitten/fetcher.rb +10 -0
data/lib/data_kitten/hosts/bitbucket.rb +2 -2
data/lib/data_kitten/hosts/gist.rb +2 -2
data/lib/data_kitten/hosts/github.rb +2 -2
data/lib/data_kitten/publishing_formats/ckan.rb +21 -18
data/lib/data_kitten/version.rb +1 -1
metadata +1 -15

checksums.yaml CHANGED Viewed

@@ -1,15 +1,15 @@
 ---
 !binary "U0hBMQ==":
   metadata.gz: !binary |-
-    MTU4NmQ1MmU2YjJhY2U2NmVjOWE5NTBhOGM2YjNhNGQzMWIxYTU4MQ==
+    MzYzNmUyZmIxZGNlOGViNGQwMmQ5YThiZDQzYWZiNTc2OTBkZWYwYQ==
   data.tar.gz: !binary |-
-    ZjNhNWU4NzNlZjI4ODU3ZTRkYTgxZmY4MWM0NTA3OTQyNDNmMmJjMg==
+    NDVhMTRkZWRlMGI2YWEzNWZhNmRjZTRkNWJmNjAwOTRiNTVkMmQ0Yw==
 SHA512:
   metadata.gz: !binary |-
-    ZjY0ODMyZmZjODMxNmU4NmNhMmI4N2YxMWMzMThhNGFlMTAxZmQ0ODA1OTcz
-    MjcwOWIwMmYyMmRiNDlmNTEyN2ExMWE4N2E4NWJlMDFlNjI3NDU4ZmZjZWFj
-    Yjk2MWZiZGJmODE5ZDVjMWQ0OTY2ODI4MWRiMzVlZmE5NjM0OGM=
+    MDg5ZGZiNjRjYjM3ODgwNjRlYWU2NTlhM2EzMTgyNmRhOGM4MjQ1YWYzMGU3
+    YjU1NTI2ZmY4OGJkNWRiYTZhYzNmM2QzZWUwMjQzMDlhZGViYjlmYjM3ZmU1
+    MzA1ZGJkZGU5MWM0NjIyZWMzY2M5YThiNjA1ZTIwOTg4OGY2ZDY=
   data.tar.gz: !binary |-
-    MjY5NDZiZjliNThjZTk5NTQ4YzZlM2M4OGFiOGYzYjFjZDhmYzQ4NmRlMjJm
-    ZmE4MTMxZTMyZmQzNjBhODEyODZhODA3ZjIyMDUzNDFmMWJiMWRhNTBlMTU3
-    MTM2MTU4ZGY4YmZlMjcyM2VkYmM5Y2Q3NzY3YWJmNmNlODdlZTI=
+    OTVmZTRlODU0ZDJkMmRhMjU3YTViY2VlODlhNDBlYjVkYjdlM2RhNDE4N2Uw
+    MGQ1MTQ4NTY2MGY2YTIwZjY4YmE5NzQ1OWExODJjNWM3MDk0YmU3YjcxZGVm
+    Njg0ZjQ0NzFjN2I0NmM5OTM4OWM0MzhlNTY2OWIyMjYyZDZiM2U=

data/README.md CHANGED Viewed

@@ -45,7 +45,7 @@ Require if you need to:
 Request a dataset:
-	dataset = DataKitten::Dataset.new(access_url: "https://github.com/theodi/dataset-mod-disposals.git")
+	dataset = DataKitten::Dataset.new("https://github.com/theodi/dataset-mod-disposals.git")
 Use the results:

data/bin/data_kitten CHANGED Viewed

@@ -9,7 +9,7 @@ if ARGV.length == 0
   exit 1
 end
-dataset = DataKitten::Dataset.new(access_url: ARGV[0])
+dataset = DataKitten::Dataset.new(ARGV[0])
 if dataset.publishing_format == nil
   puts "Unable to determine format for dataset metadata"

data/lib/data_kitten.rb CHANGED Viewed

@@ -8,7 +8,6 @@ require 'rdf'
 require 'linkeddata'
 require 'nokogiri'
 require 'uri'
-require 'curb'
 require 'datapackage'
 require 'data_kitten/license'
@@ -41,4 +40,4 @@ require 'data_kitten/fetcher'
 #   dataset.publishing_format         # => :datapackage
 #   dataset.distributions             # => [Distribution<#1>, Distribution<#2>]
 #   dataset.distributions[0].headers  # => ['col1', 'col2']
-#   dataset.distributions[0].data[0]  # => {'col1' => 'value_1', 'col2' => 'value_2'}
+#   dataset.distributions[0].data[0]  # => {'col1' => 'value_1', 'col2' => 'value_2'}

data/lib/data_kitten/dataset.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module DataKitten
   # use the Datapackage metadata format.
   #
   # @example Load a Dataset from a git repository
-  #   dataset = Dataset.new(access_url: 'git://github.com/theodi/dataset-metadata-survey.git')
+  #   dataset = Dataset.new('git://github.com/theodi/dataset-metadata-survey.git')
   #   dataset.supported?         # => true
   #   dataset.origin             # => :git
   #   dataset.host               # => :github
@@ -30,13 +30,25 @@ module DataKitten
     attr_accessor :access_url
     # Create a new Dataset object
-    #
-    # @param [Hash] options the details of the Dataset.
-    # @option options [String] :access_url A URL that can be used to access the Dataset.
-    #                                      The class will attempt to auto-load metadata from this URL.
     #
-    def initialize(options)
-      @access_url = DataKitten::Fetcher.wrap(options[:access_url])
+    # The class will attempt to auto-load metadata from this URL.
+    #
+    # @overload new(url)
+    #   @param [String] url A URL that can be used to access the Dataset
+    #
+    # @overload new(options)
+    #   @param [Hash] options the details of the Dataset.
+    #   @option options [String] :access_url A URL that can be used to access the Dataset.
+    #
+    def initialize(url_or_options)
+      url = case url_or_options
+      when Hash
+        url_or_options[:access_url]
+      else
+        url_or_options
+      end
+      @access_url = DataKitten::Fetcher.wrap(url)
       detect_origin
       detect_host
       detect_publishing_format
@@ -51,9 +63,11 @@ module DataKitten
     end
     def source
-      @access_url.as_json if @access_url.ok?
+      @source ||= @access_url.as_json if @access_url.ok?
     end
+    attr_writer :source
     # Can metadata be loaded for this Dataset?
     #
     # @return [Boolean] true if metadata can be loaded, false if it's
@@ -84,9 +98,7 @@ module DataKitten
     #
     # @return [String] the identifier of the dataset
     #
-    def identifier
-      nil
-    end
+    attr_accessor :identifier
     # The human-readable title of the dataset.
     #
@@ -269,5 +281,7 @@ module DataKitten
       nil
     end
+    attr_accessor :metadata
   end
 end

data/lib/data_kitten/distribution.rb CHANGED Viewed

@@ -108,6 +108,8 @@ module DataKitten
       @dialect ||= {
         "delimiter" => ","
       }
+      @download = Fetcher.wrap(@download_url)
     end
     # A usable name for the distribution, unique within the {Dataset}.
@@ -136,9 +138,7 @@ module DataKitten
     #
     # @return [Boolean] whether the HTTP response returns a success code or not
     def exists?
-      if @download_url
-        http_head.response_code != 404
-      end
+      @download.exists?
     end
     # A CSV object representing the loaded data.
@@ -148,8 +148,8 @@ module DataKitten
       @data ||= begin
         if @path
           datafile = @dataset.send(:load_file, @path)
-        elsif @download_url
-          datafile = RestClient.get @download_url rescue nil
+        elsif @download.ok?
+          datafile = @download.body
         end
         if datafile
           case format.extension
@@ -170,17 +170,6 @@ module DataKitten
       end
     end
-    def http_head
-      if @download_url
-        @http_head ||= begin
-          Curl::Easy.http_head(@download_url) do |c|
-            c.follow_location = true
-            c.useragent = "curb"
-          end
-        end
-      end
-    end
   end
 end

data/lib/data_kitten/distribution_format.rb CHANGED Viewed

@@ -6,6 +6,30 @@ module DataKitten
   #
   class DistributionFormat
+    FORMATS = {
+      csv: { structured: true, open: true },
+      xls: { structured: true, open: false },
+      xlsx: { structured: true, open: true },
+      rdf: { structured: true, open: true },
+      xml: { structured: true, open: true },
+      wms: { structured: true, open: true },
+      ods: { structured: true, open: true },
+      rdfa: { structured: true, open: true },
+      kml: { structured: true, open: true },
+      rss: { structured: true, open: true },
+      json: { structured: true, open: true },
+      ical: { structured: true, open: true },
+      sparql: { structured: true, open: true },
+      kml: { structured: true, open: true },
+      georss: { structured: true, open: true },
+      geojson: { structured: true, open: true },
+      shp: { structured: true, open: true },
+      html: { structured: false, open: true },
+      doc: { structured: false, open: false },
+      pdf: { structured: false, open: true }
+    }
+    FORMATS.default = {}
     #@!attribute extension
     #@return [Symbol] a symbol for the file extension. For instance, :csv.
     attr_reader :extension
@@ -17,43 +41,20 @@ module DataKitten
       @distribution = distribution
       # Store extension as a lowercase symbol
       @extension = distribution.extension.to_s.downcase.to_sym
-      # Set up format lists
-      @@formats ||= {
-        csv:     { structured:  true, open:  true },
-        xls:     { structured:  true, open: false },
-        xlsx:    { structured:  true, open:  true },
-        rdf:     { structured:  true, open:  true },
-        xml:     { structured:  true, open:  true },
-        wms:     { structured:  true, open:  true },
-        ods:     { structured:  true, open:  true },
-        rdfa:    { structured:  true, open:  true },
-        kml:     { structured:  true, open:  true },
-        rss:     { structured:  true, open:  true },
-        json:    { structured:  true, open:  true },
-        ical:    { structured:  true, open:  true },
-        sparql:  { structured:  true, open:  true },
-        kml:     { structured:  true, open:  true },
-        georss:  { structured:  true, open:  true },
-        geojson: { structured:  true, open:  true },
-        shp:     { structured:  true, open:  true },
-        html:    { structured: false, open:  true },
-        doc:     { structured: false, open:  false },
-        pdf:     { structured: false, open:  true },
-      }
     end
     # Is this a structured format?
     #
     # @return [Boolean] whether the format is machine-readable or not.
     def structured?
-      @@formats[@extension][:structured] rescue nil
+      FORMATS[extension][:structured]
     end
     # Is this an open format?
     #
     # @return [Boolean] whether the format is open or not
     def open?
-      @@formats[@extension][:open] rescue nil
+      FORMATS[extension][:open]
     end
     # Whether the format of the file matches the extension given by the data

data/lib/data_kitten/fetcher.rb CHANGED Viewed

@@ -16,6 +16,16 @@ module DataKitten
       @url = url
     end
+    def exists?
+      if @requested
+        ok?
+      else
+        RestClient.head(url).code == 200
+      end
+    rescue RestClient::ExceptionWithResponse => error
+      false
+    end
     def ok?
       code == 200
     end

data/lib/data_kitten/hosts/bitbucket.rb CHANGED Viewed

@@ -30,7 +30,7 @@ module DataKitten
       # @return [String] The supplied path with the Bitbucket base URL prepended
       #
       # @example
-      #   dataset = Dataset.new(access_url: 'https://bitbucket.org/floppy/hot-drinks.git')
+      #   dataset = Dataset.new('https://bitbucket.org/floppy/hot-drinks.git')
       #   dataset.bitbucket_path           # => 'https://bitbucket.org/floppy/hot-drinks/'
       #   dataset.bitbucket_path('pull-requests') # => 'https://bitbucket.org/floppy/hot-drinks/pull-requests'
       def bitbucket_path(path = '')
@@ -51,4 +51,4 @@ module DataKitten
   end
-end
+end

data/lib/data_kitten/hosts/gist.rb CHANGED Viewed

@@ -30,7 +30,7 @@ module DataKitten
       # @return [String] The supplied path with the Gist base URL prepended
       #
       # @example
-      #   dataset = Dataset.new(access_url: 'git://gist.github.com/5633865.git')
+      #   dataset = Dataset.new('git://gist.github.com/5633865.git')
       #   dataset.gist_path           # => 'https://gist.github.com/5633865'
       #   dataset.gist_path('download') # => 'https://gist.github.com/5633865/download'
       def gist_path(path = '')
@@ -47,4 +47,4 @@ module DataKitten
   end
-end
+end

data/lib/data_kitten/hosts/github.rb CHANGED Viewed

@@ -30,7 +30,7 @@ module DataKitten
       # @return [String] The supplied path with the GitHub base URL prepended
       #
       # @example
-      #   dataset = Dataset.new(access_url: 'git://github.com/theodi/dataset-metadata-survey.git')
+      #   dataset = Dataset.new('git://github.com/theodi/dataset-metadata-survey.git')
       #   dataset.github_path           # => 'https://github.com/theodi/dataset-metadata-survey/'
       #   dataset.github_path('issues') # => 'https://github.com/theodi/dataset-metadata-survey/issues'
       def github_path(path = '')
@@ -51,4 +51,4 @@ module DataKitten
   end
-end
+end

data/lib/data_kitten/publishing_formats/ckan.rb CHANGED Viewed

@@ -6,29 +6,36 @@ module DataKitten
     module CKAN
-      @@metadata = nil
       private
       def self.supported?(instance)
         uri = instance.uri
-        package = uri.path.split("/").last
+        base_uri = uri.merge("/")
+        *base, package = uri.path.split('/')
+        # If the 2nd to last element in the path is 'dataset' then it's probably
+        # the CKAN dataset view page, the last element will be the dataset id
+        # or name
+        if base.last == "dataset"
+          instance.identifier = package
+          # build a base URI ending with a /
+          base_uri = uri.merge(base[0...-1].join('/') + '/')
         # If the package is a UUID - it's more than likely to be a CKAN ID
-        if package.match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/)
-          @@id = package
+        elsif package.match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/)
+          instance.identifier = package
         else
-          results = RestClient.get "#{uri.scheme}://#{uri.host}/api/3/action/package_show", {:params => {:id => package}} rescue ""
-          if results == ""
-            results = RestClient.get "#{uri.scheme}://#{uri.host}/api/2/rest/dataset/#{package}"
+          results = begin
+            RestClient.get base_uri.merge("api/3/action/package_show").to_s, {:params => {:id => package}}
+          rescue RestClient::Exception
+            RestClient.get base_uri.merge("api/2/rest/dataset/#{package}").to_s
           end
           result = JSON.parse results
-          @@id = result["result"]["id"] rescue result["id"]
+          instance.identifier = result.fetch("result", result)["id"]
         end
-        @@metadata = JSON.parse RestClient.get "#{uri.scheme}://#{uri.host}/api/rest/package/#{@@id}"
-        @@metadata.extend(GuessableLookup)
+        instance.metadata = JSON.parse RestClient.get base_uri.merge("api/rest/package/#{instance.identifier}").to_s
+        instance.metadata.extend(GuessableLookup)
+        instance.source = instance.metadata
+        return true
       rescue
         false
       end
@@ -62,7 +69,7 @@ module DataKitten
       #
       # @see Dataset#identifier
       def identifier
-        metadata.lookup("name") || @@id
+        metadata.lookup("name") || @identifier
       end
       # A web page which can be used to gain access to the dataset
@@ -210,10 +217,6 @@ module DataKitten
       private
-      def metadata
-        @@metadata
-      end
       def select_extras(group, key)
         extra = group["extras"][key] rescue ""
         if extra == ""

data/lib/data_kitten/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module DataKitten
-  VERSION = "1.3.0"
+  VERSION = "1.3.1"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: data_kitten
 version: !ruby/object:Gem::Version
-  version: 1.3.0
+  version: 1.3.1
 platform: ruby
 authors:
 - James Smith
@@ -95,20 +95,6 @@ dependencies:
     - - ! '>='
       - !ruby/object:Gem::Version
         version: '0'
-- !ruby/object:Gem::Dependency
-  name: curb
-  requirement: !ruby/object:Gem::Requirement
-    requirements:
-    - - ! '>='
-      - !ruby/object:Gem::Version
-        version: '0'
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - ! '>='
-      - !ruby/object:Gem::Version
-        version: '0'
 - !ruby/object:Gem::Dependency
   name: datapackage
   requirement: !ruby/object:Gem::Requirement