RubyGems - datapackage - Versions diffs - 0.1.3 → 0.2.1 - Mend

datapackage 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/README.md +140 -54
data/bin/console +10 -0
metadata +5 -52
data/datapackage/schemas/LICENSE.md +0 -24
data/datapackage/schemas/README.md +0 -33
data/datapackage/schemas/csv-dialect-description-format.json +0 -30
data/datapackage/schemas/data-package.json +0 -146
data/datapackage/schemas/definitions.json +0 -222
data/datapackage/schemas/fiscal-data-package.json +0 -279
data/datapackage/schemas/fiscal-data-package.jsonld +0 -5
data/datapackage/schemas/index.html +0 -15
data/datapackage/schemas/json-table-schema.json +0 -83
data/datapackage/schemas/registry.csv +0 -4
data/datapackage/schemas/tabular-data-package.json +0 -147
data/datapackage/schemas/tests/__init__.py +0 -0
data/datapackage/schemas/tests/test_registry.py +0 -102
data/datapackage/schemas/tests/test_schemas.py +0 -41
data/lib/datapackage/exceptions.rb +0 -12
data/lib/datapackage/package.rb +0 -181
data/lib/datapackage/registry.rb +0 -81
data/lib/datapackage/resource.rb +0 -83
data/lib/datapackage/schema.rb +0 -111
data/lib/datapackage/version.rb +0 -3
data/lib/datapackage.rb +0 -17

data/datapackage/schemas/tabular-data-package.json DELETED Viewed

@@ -1,147 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-04/schema#",
-  "title": "Tabular Data Package",
-  "description": "Tabular Data Package is a simple specification for data access and delivery of tabular data.",
-  "type": "object",
-  "required": [ "name", "resources" ],
-  "properties": {
-    "name": {
-      "$ref": "definitions.json#/define/name",
-      "propertyOrder": 10
-    },
-    "title": {
-      "$ref": "definitions.json#/define/title",
-      "propertyOrder": 20
-    },
-    "description": {
-      "$ref": "definitions.json#/define/description",
-      "format": "textarea",
-      "propertyOrder": 30
-    },
-    "homepage": {
-      "$ref": "definitions.json#/define/homepage",
-      "propertyOrder": 40
-    },
-    "version": {
-      "$ref": "definitions.json#/define/version",
-      "propertyOrder": 50
-    },
-    "license": {
-      "$ref": "definitions.json#/define/license",
-      "propertyOrder": 60
-    },
-    "author": {
-      "$ref": "definitions.json#/define/author",
-      "propertyOrder": 70
-    },
-    "contributors": {
-      "$ref": "definitions.json#/define/contributors",
-      "propertyOrder": 80,
-      "options": { "hidden": true }
-    },
-    "resources": {
-      "title": "Resources",
-      "description": "The data resources that this package describes.",
-      "type": "array",
-      "propertyOrder": 90,
-      "minItems": 0,
-      "items": {
-        "type": "object",
-        "properties": {
-          "name": {
-            "$ref": "definitions.json#/define/name",
-            "propertyOrder": 10
-          },
-          "title": {
-            "$ref": "definitions.json#/define/title",
-            "propertyOrder": 20
-          },
-          "description": {
-            "$ref": "definitions.json#/define/description",
-            "propertyOrder": 30,
-            "format": "textarea"
-          },
-          "schema": {
-            "$ref": "definitions.json#/define/schema",
-            "propertyOrder": 40
-          },
-          "url": {
-            "$ref": "definitions.json#/define/url",
-            "propertyOrder": 50
-          },
-          "path": {
-            "$ref": "definitions.json#/define/path",
-            "propertyOrder": 60
-          },
-          "data": {
-            "$ref": "definitions.json#/define/data",
-            "propertyOrder": 70
-          },
-          "format": {
-            "$ref": "definitions.json#/define/format",
-            "propertyOrder": 80
-          },
-          "mediatype": {
-            "$ref": "definitions.json#/define/mediatype",
-            "propertyOrder": 90
-          },
-          "encoding": {
-            "$ref": "definitions.json#/define/encoding",
-            "propertyOrder": 100
-          },
-          "bytes": {
-            "$ref": "definitions.json#/define/bytes",
-            "propertyOrder": 110,
-            "options": { "hidden": true }
-          },
-          "hash": {
-            "$ref": "definitions.json#/define/hash",
-            "propertyOrder": 120,
-            "options": { "hidden": true }
-          },
-          "dialect": {
-            "$ref": "definitions.json#/define/dialect",
-            "propertyOrder": 130,
-            "options": { "hidden": true }
-          },
-          "sources": {
-            "$ref": "definitions.json#/define/sources",
-            "propertyOrder": 140,
-            "options": { "hidden": true }
-          },
-          "license": {
-            "$ref": "definitions.json#/define/license",
-            "description": "The license under which the resource is published.",
-            "propertyOrder": 150,
-            "options": { "hidden": true }
-          }
-        },
-        "anyOf": [
-          { "title": "url required", "required": ["url"] },
-          { "title": "path required", "required": ["path"] },
-          { "title": "data required", "required": ["data"] }
-        ],
-        "required": [ "schema" ]
-      }
-    },
-    "keywords": {
-      "$ref": "definitions.json#/define/keywords",
-      "propertyOrder": 100
-    },
-    "sources": {
-      "$ref": "definitions.json#/define/sources",
-      "propertyOrder": 110,
-      "options": { "hidden": true }
-    },
-    "image": {
-      "$ref": "definitions.json#/define/image",
-      "propertyOrder": 120,
-      "options": { "hidden": true }
-    },
-    "dataDependencies": {
-      "$ref": "definitions.json#/define/dataDependencies",
-      "propertyOrder": 140,
-      "options": { "hidden": true }
-    }
-  }
-}

data/datapackage/schemas/tests/__init__.py DELETED Viewed

File without changes

data/datapackage/schemas/tests/test_registry.py DELETED Viewed

@@ -1,102 +0,0 @@
-import os
-import csv
-import urllib
-import unittest
-BASE_PATH = os.path.abspath(
-    os.path.join(
-        os.path.dirname(__file__),
-        '..'
-    )
-)
-REGISTRY_PATH = os.path.join(BASE_PATH, 'registry.csv')
-class TestRegistry(unittest.TestCase):
-    def test_registry_has_the_expected_headers(self):
-        expected_headers = (
-            'id',
-            'title',
-            'schema',
-            'schema_path',
-            'specification',
-        )
-        with open(REGISTRY_PATH, 'r', newline='') as f:
-            headers = next(csv.reader(f))
-        self.assertEqual(sorted(headers), sorted(expected_headers))
-    def test_registry_schemas_have_the_required_attributes(self):
-        required_attributes = (
-            'id',
-            'title',
-            'schema',
-            'schema_path',
-            'specification',
-        )
-        with open(REGISTRY_PATH, 'r', newline='') as f:
-            registry = csv.DictReader(f)
-            msg = "Schema '{0}' doesn't define required attribute '{1}'"
-            for schema in registry:
-                for key, value in schema.items():
-                    if key in required_attributes:
-                        assert value != '', msg.format(schema['id'], key)
-    def test_registry_schemas_have_unique_ids(self):
-        with open(REGISTRY_PATH, 'r', newline='') as f:
-            registry = csv.DictReader(f)
-            ids = [schema['id'] for schema in registry]
-            assert len(ids) == len(set(ids)), "The schemas IDs aren't unique"
-    def test_schema_paths_exist_and_are_files(self):
-        with open(REGISTRY_PATH, 'r', newline='') as f:
-            registry = csv.DictReader(f)
-            for entry in registry:
-                schema_path = entry['schema_path']
-                msg = "schema_path '{0}' of schema '{1}' isn't a file"
-                msg = msg.format(schema_path, entry['id'])
-                path = os.path.join(BASE_PATH, schema_path)
-                assert os.path.isfile(path), msg
-    def test_schema_urls_exist(self):
-        is_successful = lambda req: req.status >= 200 and req.status < 400
-        is_redirect = lambda req: req.status >= 300 and req.status < 400
-        with open(REGISTRY_PATH, 'r', newline='') as f:
-            registry = csv.DictReader(f)
-            for entry in registry:
-                try:
-                    url = entry['schema']
-                    res = self._make_head_request(url)
-                    msg = "Error fetching schema_url '{0}' of schema '{1}'"
-                    msg = msg.format(url, entry['id'])
-                    assert (is_successful(res) or is_redirect(res)), msg
-                except urllib.error.URLError as e:
-                    raise Exception(msg) from e
-    def test_specification_urls_exist(self):
-        is_successful = lambda req: req.status >= 200 and req.status < 400
-        is_redirect = lambda req: req.status >= 300 and req.status < 400
-        with open(REGISTRY_PATH, 'r', newline='') as f:
-            registry = csv.DictReader(f)
-            for entry in registry:
-                try:
-                    url = entry['schema']
-                    res = self._make_head_request(url)
-                    msg = "Error fetching specification '{0}' of schema '{1}'"
-                    msg = msg.format(url, entry['id'])
-                    assert (is_successful(res) or is_redirect(res)), msg
-                except urllib.error.URLError as e:
-                    raise Exception(msg) from e
-    def _make_head_request(self, url):
-        req = urllib.request.Request(url, method='HEAD')
-        return urllib.request.urlopen(req)

data/datapackage/schemas/tests/test_schemas.py DELETED Viewed

@@ -1,41 +0,0 @@
-import os
-import glob
-import json
-import unittest
-import jsonschema
-BASE_PATH = os.path.abspath(
-    os.path.join(
-        os.path.dirname(__file__),
-        '..'
-    )
-)
-class TestSchemas(unittest.TestCase):
-    def test_json_files_must_be_valid(self):
-        json_glob = os.path.join(BASE_PATH, '*.json')
-        json_paths = glob.glob(json_glob)
-        for json_path in json_paths:
-            try:
-                with open(json_path, 'r') as f:
-                    json.load(f)
-            except ValueError as e:
-                msg = "File '{0}' isn\'t a valid JSON."
-                raise ValueError(msg.format(json_path)) from e
-    def test_json_files_must_be_valid_json_schemas(self):
-        json_glob = os.path.join(BASE_PATH, '*.json')
-        json_paths = glob.glob(json_glob)
-        for json_path in json_paths:
-            with open(json_path, 'r') as f:
-                schema = json.load(f)
-            try:
-                validator_class = jsonschema.validators.validator_for(schema)
-                validator = validator_class(schema)
-                validator.check_schema(schema)
-            except jsonschema.exceptions.SchemaError as e:
-                msg = "File '{0}' isn\'t a valid JSON Schema."
-                raise ValueError(msg.format(json_path)) from e

data/lib/datapackage/exceptions.rb DELETED Viewed

@@ -1,12 +0,0 @@
-module DataPackage
-  class RegistryError < StandardError; end
-  class SchemaException < Exception
-    attr_reader :status, :message
-    def initialize status
-      @status = status
-      @message = status
-    end
-  end
-end

data/lib/datapackage/package.rb DELETED Viewed

@@ -1,181 +0,0 @@
-require 'open-uri'
-module DataPackage
-  class Package < Hash
-    attr_reader :opts, :errors
-    attr_writer :resources
-    # Parse or create a data package
-    #
-    # Supports reading data from JSON file, directory, and a URL
-    #
-    # package:: Hash or a String
-    # schema:: Hash, Symbol or String
-    # opts:: Options used to customize reading and parsing
-    def initialize(package = nil, schema = :base, opts = {})
-      @opts = opts
-      @schema = DataPackage::Schema.new(schema || :base)
-      @dead_resources = []
-      self.merge! parse_package(package)
-      define_properties!
-      load_resources!
-    end
-    def parse_package(package)
-      # TODO: base directory/url
-      if package.nil?
-        {}
-      elsif package.class == Hash
-        package
-      else
-        read_package(package)
-      end
-    end
-    # Returns the directory for a local file package or base url for a remote
-    # Returns nil for an in-memory object (because it has no base as yet)
-    def base
-      # user can override base
-      return @opts[:base] if @opts[:base]
-      return '' unless @location
-      # work out base directory or uri
-      if local?
-          return File.dirname(@location)
-      else
-          return @location.split('/')[0..-2].join('/')
-      end
-    end
-    # Is this a local package? Returns true if created from an in-memory object or a file/directory reference
-    def local?
-      return @local if @local
-      return !@location.start_with?('http') if @location
-      true
-    end
-    def resources
-      update_resources!
-      @resources
-    end
-    def property(property, default = nil)
-      self[property] || default
-    end
-    def valid?
-      validate
-      @valid
-    end
-    def validate
-      @errors = @schema.validation_errors(self)
-      @valid = @schema.valid?(self)
-    end
-    def resource_exists?(location)
-      @dead_resources.include?(location)
-    end
-    def to_json
-      self.to_json
-    end
-    private
-    def define_properties!
-      (@schema['properties'] || {}).each do |k, v|
-        next if k == 'resources'
-        define_singleton_method("#{k.to_sym}=", proc { |p| set_property(k, p) })
-        define_singleton_method(k.to_sym.to_s, proc { property k, default_value(v) })
-      end
-    end
-    def load_resources!
-      @resources = (self['resources'] || [])
-      update_resources!
-    end
-    def update_resources!
-      @resources.map! do |resource|
-        begin
-          load_resource(resource)
-        rescue
-          @dead_resources << resource['path']
-          nil
-        end
-      end
-    end
-    def load_resource(resource)
-      if resource.is_a?(Resource)
-        resource
-      else
-        Resource.load(resource, base)
-      end
-    end
-    def default_value(schema_data)
-      case schema_data['type']
-      when 'string'
-          nil
-      when 'array'
-          []
-      when 'object'
-          {}
-      end
-    end
-    def set_property(key, value)
-      self[key] = value
-    end
-    def read_package(package)
-      if is_directory?(package)
-          package = File.join(package, opts[:default_filename] || 'datapackage.json')
-      elsif is_containing_url?(package)
-          package = URI.join(package, 'datapackage.json')
-      end
-      @location = package.to_s
-      if File.extname(package.to_s) == '.zip'
-          unzip_package(package)
-      else
-          JSON.parse open(package).read
-      end
-    end
-    def is_directory?(package)
-      !package.start_with?('http') && File.directory?(package)
-    end
-    def is_containing_url?(package)
-      package.start_with?('http') && !package.end_with?('datapackage.json', 'datapackage.zip')
-    end
-    def write_to_tempfile(url)
-      tempfile = Tempfile.new('datapackage')
-      tempfile.write(open(url).read)
-      tempfile.rewind
-      tempfile
-    end
-    def unzip_package(package)
-      package = write_to_tempfile(package) if package.start_with?('http')
-      dir = Dir.mktmpdir
-      Zip::File.open(package) do |zip_file|
-          # Extract all the files
-          zip_file.each { |entry| entry.extract("#{dir}/#{File.basename entry.name}") }
-          # Get and parse the datapackage metadata
-          entry = zip_file.glob("*/#{opts[:default_filename] || 'datapackage.json'}").first
-          package = JSON.parse(entry.get_input_stream.read)
-      end
-      # Set the base dir to the directory we unzipped to
-      @opts[:base] = dir
-      # This is now a local file, not a URL
-      @local = true
-      package
-    end
-  end
-end

data/lib/datapackage/registry.rb DELETED Viewed

@@ -1,81 +0,0 @@
-module DataPackage
-  ##
-  # Allow loading Data Package profiles from a registry.
-  class Registry
-    DEFAULT_REGISTRY_URL = 'http://schemas.datapackages.org/registry.csv'
-    DEFAULT_REGISTRY_PATH = File.join(File.expand_path(File.dirname(__FILE__)), '..', '..', 'datapackage', 'schemas', 'registry.csv')
-    attr_reader :base_path
-    def initialize(registry_path_or_url = DEFAULT_REGISTRY_PATH)
-      registry_path_or_url ||= DEFAULT_REGISTRY_PATH
-      if File.file?(registry_path_or_url)
-        @base_path = File.dirname(
-          File.absolute_path(registry_path_or_url)
-        )
-      end
-      @profiles = {}
-      @registry = get_registry(registry_path_or_url)
-    end
-    def get(profile_id)
-      @profiles[profile_id] ||= get_profile(profile_id)
-    end
-    def available_profiles
-      @registry
-    end
-    private
-      def get_profile(profile_id)
-        profile_metadata = @registry[profile_id]
-        return if profile_metadata.nil?
-        path = get_absolute_path(profile_metadata[:schema_path])
-        if path && File.file?(path)
-          load_json(path)
-        else
-          url = profile_metadata[:schema]
-          load_json(url)
-        end
-      end
-      def get_registry(registry_path_or_url)
-        begin
-          csv = parse_csv(registry_path_or_url)
-          registry = {}
-          csv.each { |row| registry[row.fetch(:id)] = Hash[row.headers.zip(row.fields)] }
-        rescue KeyError, OpenURI::HTTPError, Errno::ENOENT
-          raise(RegistryError)
-        end
-        registry
-      end
-      def parse_csv(path_or_url)
-        csv = open(path_or_url).read
-        if csv.match(/,/)
-          CSV.new(csv, headers: :first_row, header_converters: :symbol)
-        else
-          raise RegistryError
-        end
-      end
-      def get_absolute_path(relative_path)
-        File.join(@base_path, relative_path)
-      rescue TypeError
-        nil
-      end
-      def load_json(path)
-        json = open(path).read
-        JSON.parse(json)
-      rescue JSON::ParserError, OpenURI::HTTPError
-        raise RegistryError
-      end
-  end
-end

data/lib/datapackage/resource.rb DELETED Viewed

@@ -1,83 +0,0 @@
-module DataPackage
-  class Resource < Hash
-    def initialize(resource, base_path = '')
-      self.merge! resource
-    end
-    def self.load(resource, base_path = '')
-      # This returns if there are no alternative ways to access the data OR there
-      # is a base_path which is a URL
-      if is_url?(resource, base_path)
-        RemoteResource.new(resource, base_path)
-      else
-        # If there's a data attribute, we definitely want an inline resource
-        if resource['data']
-          InlineResource.new(resource)
-        else
-          # If the file exists - we want a local resource
-          if file_exists?(resource, base_path)
-            LocalResource.new(resource, base_path)
-          # If it doesn't exist and there's a URL to grab the data from, we want
-          # a remote resource
-          elsif resource['url']
-            RemoteResource.new(resource, base_path)
-          end
-        end
-      end
-    end
-    def self.file_exists?(resource, base_path)
-      path = resource['path']
-      path = File.join(base_path, path) if base_path != ''
-      File.exists?(path)
-    end
-    def self.is_url?(resource, base_path)
-      return true if resource['url'] != nil && resource['path'] == nil && resource['data'] == nil
-      return true if base_path.start_with?('http')
-    end
-    def table
-      @table ||= JsonTableSchema::Table.new(CSV.parse(data), self['schema']) if self['schema']
-    end
-  end
-  class LocalResource < Resource
-    def initialize(resource, base_path = '')
-      @base_path = base_path
-      @path = resource['path']
-      super
-    end
-    def data
-      @path = File.join(@base_path, @path) if @base_path != ''
-      open(@path).read
-    end
-  end
-  class InlineResource < Resource
-    def data
-      self['data']
-    end
-  end
-  class RemoteResource < Resource
-    def initialize(resource, base_url = '')
-      @base_url = base_url
-      @url = resource['url']
-      @path = resource['path']
-      super
-    end
-    def data
-      url = @url ? @url : URI.join(@base_url, @path)
-      open(url).read
-    end
-  end
-end