RubyGems - url_parser - Versions diffs - 0.1.0 - Mend

url_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: d88f4309a1787a5ed3f004e60a85e5f4a5e26765
+  data.tar.gz: 78146dbfb19dbec7f5f9169fef026a7b2448c43d
+SHA512:
+  metadata.gz: 99801e22611a0d7c78b576e01aacb097c32852fa99775674524b2bf1f000988e64cb00766eaeec083ce16b0cb7c10c9b29745dff7186fca43135276e467c8a05
+  data.tar.gz: 4df106696e71b773da7bc7e7997ac8ade0cb4002327cbb107c1b436c420373b1ce55abcd0eed67d0b328385c7d35d190bc921e5e64d264ef26c4ce7232817389

data/.gitignore ADDED Viewed

@@ -0,0 +1,22 @@
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp
+*.bundle
+*.so
+*.o
+*.a
+mkmf.log

data/.rspec ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ --color
2	+ --format progress

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in url_parser.gemspec
+gemspec

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2014 Matt Solt
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,33 @@
+# UrlParser
+Combine PostRank-URI, Domainatrix, and other Ruby url parsing libraries into a common interface.
+See also:
+- https://github.com/pauldix/domainatrix
+- https://github.com/postrank-labs/postrank-uri
+## Installation
+Add this line to your application's Gemfile:
+    gem 'url_parser'
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install url_parser
+## Usage
+TODO: Write usage instructions here
+## Contributing
+1. Fork it ( https://github.com/[my-github-username]/url_parser/fork )
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create a new Pull Request

data/Rakefile ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ require "bundler/gem_tasks"
2	+

data/lib/url_parser/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module UrlParser
+  VERSION = "0.1.0"
+end

data/lib/url_parser.rb ADDED Viewed

@@ -0,0 +1,150 @@
+require "url_parser/version"
+require "domainatrix"
+require "postrank-uri"
+require "addressable/uri"
+class Array
+  def self.wrap(object)
+    if object.nil?
+      []
+    elsif object.respond_to?(:to_ary)
+      object.to_ary || [object]
+    else
+      [object]
+    end
+  end unless respond_to?(:wrap)
+end
+module UrlParser
+  module Error; end
+  def self.call(text)
+    urls = []
+    PostRank::URI.extract(text).each do |url|
+      urls << new(url)
+    end
+    urls
+  end
+  def self.new(url, options = {})
+    Base.new(url, options)
+  end
+  class Base
+    # https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme
+    MAJOR_SCHEMES = [
+      'file', 'ftp', 'gopher', 'h323', 'hdl', 'http', 'https', 'imap', 'magnet',
+      'mailto', 'mms', 'news', 'nntp', 'prospero', 'rsync', 'rtsp', 'rtspu',
+      'sftp', 'shttp', 'sip', 'sips', 'snews', 'svn', 'svn+ssh', 'telnet',
+      'wais',
+      # Unofficial schemes
+      'aim', 'callto', 'cvs', 'facetime', 'feed', 'git', 'gtalk', 'irc', 'ircs',
+      'irc6', 'itms', 'mms', 'msnim', 'skype', 'ssh', 'smb', 'svn', 'ymsg', 'mvn'
+    ]
+    DEFAULT_SCHEMES = [
+      'http', 'https', 'ftp', 'mailto', 'file', 'ssh', 'feed',
+      'cvs', 'git', 'mvn', 'nntp', 'shttp', 'svn'
+    ]
+    attr_reader :url, :original_url
+    def initialize(url, options = {})
+      tag_errors do
+        @schemes = options.fetch(:schemes) { DEFAULT_SCHEMES }
+        @preserve = !!options[:preserve]
+        @original_url = url
+        @url = @preserve ? url : PostRank::URI.clean(url)
+      end
+    end
+    def schemes
+      Array.wrap(@schemes)
+    end
+    def uri
+      tag_errors do
+        @uri ||= Addressable::URI.parse(url) rescue nil
+      end
+    end
+    def scheme
+      uri.scheme if uri
+    end
+    def user
+      uri.user if uri
+    end
+    def password
+      uri.password if uri
+    end
+    def host
+      uri.host if uri
+    end
+    def port
+      uri.port if uri
+    end
+    def path
+      uri.path if uri
+    end
+    def query
+      uri.query if uri
+    end
+    def fragment
+      uri.fragment if uri
+    end
+    def query_values
+      uri ? uri.query_values.to_h : {}
+    end
+    def valid?
+      return true if domain == 'localhost'
+      return false if uri.nil?
+      return false unless schemes.include?(scheme)
+      return false unless host =~ /\./
+      true
+    end
+    def parser
+      tag_errors do
+        @parser ||= Domainatrix.parse(url)
+      end
+    end
+    def domain
+      parser.domain_with_public_suffix
+    end
+    def subdomain
+      unless parser.subdomain.empty?
+        parts = parser.subdomain.tap{ |s| s.slice!(domain) }.split('.')
+        parts.shift if parts.first =~ /www?\d*/
+        (parts << domain).join('.')
+      else
+        domain
+      end
+    end
+    private
+    def tag_errors
+      yield
+    rescue Exception => error
+      error.extend(UrlParser::Error)
+      raise
+    end
+  end
+end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,22 @@
+require "rspec"
+require "url_parser"
+# This file was generated by the `rspec --init` command. Conventionally, all
+# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
+# Require this file using `require "spec_helper"` to ensure that it is only
+# loaded once.
+#
+# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
+RSpec.configure do |config|
+  config.run_all_when_everything_filtered = true
+  config.filter_run :focus
+  # Run specs in random order to surface order dependencies. If you find an
+  # order dependency and want to debug it, you can fix the order by providing
+  # the seed, which is printed after each run.
+  #     --seed 1234
+  config.order = 'random'
+end

data/spec/url_parser_spec.rb ADDED Viewed

@@ -0,0 +1,176 @@
+require 'spec_helper'
+describe UrlParser do
+  let(:parser) { UrlParser.new(link) }
+  it "must be defined" do
+    expect(UrlParser::VERSION).not_to be_nil
+  end
+  context "::call" do
+    let(:link) { 'http://example.com/' }
+    let(:text) { "there is a #{link} in here" }
+    let(:extractor) { UrlParser.call(text) }
+    it "extracts urls from text into an array" do
+      expect(extractor.collect(&:url)).to include link
+    end
+    it "initializes each url with the parser" do
+      expect(extractor.first).to be_a UrlParser::Base
+    end
+  end
+  context "::new" do
+    let(:link) { 'http://example.com/' }
+    it "initializes a parser with a url" do
+      expect(parser.url).to eq link
+    end
+    it "cannot initialize invalid urls" do
+      expect{ UrlParser.new('http:||bra.ziz') }.to raise_error
+    end
+    it "adds http by default" do
+      expect(UrlParser.new('example.com').url).to eq link
+    end
+    it "adds http to protocol-less urls" do
+      expect(UrlParser.new('//example.com').url).to eq link
+    end
+    it "any errors raised inherit from UrlParser::Error" do
+      expect{
+        UrlParser.new('http:||bra.ziz')
+      }.to raise_error UrlParser::Error
+    end
+    context "options" do
+      context ":preserve" do
+        let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
+        it "is false by default" do
+          expect(parser.url).not_to eq parser.original_url
+        end
+        it "does not clean the url when true" do
+          parser = UrlParser.new(link, preserve: true)
+          expect(parser.url).to eq parser.original_url
+        end
+      end
+    end
+  end
+  context "#uri" do
+    it "returns a parsed uri" do
+      expect(UrlParser.new('http://example.com').uri).to be_a Addressable::URI
+    end
+  end
+  context "#valid?" do
+    it "returns false if the url is invalid" do
+      expect(UrlParser.new('bullshit')).not_to be_valid
+    end
+    it "returns false if the url scheme is not in the options" do
+      expect(UrlParser.new('telnet://some.com')).not_to be_valid
+    end
+    it "returns true if the url scheme is in the options" do
+      expect(UrlParser.new('telnet://some.com', schemes: ['telnet'])).to be_valid
+    end
+    it "returns true if the url is valid" do
+      expect(UrlParser.new('http://example.com/')).to be_valid
+    end
+    it "returns true for localhost" do
+      expect(UrlParser.new('localhost:5000')).to be_valid
+    end
+  end
+  context "#original_url" do
+    let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
+    it "preserves the url input" do
+      expect(parser.original_url).to eq link
+    end
+  end
+  context "#url" do
+    let(:link) { 'link.to?a=b&utm_source=FeedBurner#stuff' }
+    it "returns a url" do
+      expect(parser.url).to eq 'http://link.to/?a=b'
+    end
+    it "attempts to clean and normalize urls" do
+      [
+        'http://igvita.com/',
+        'http://igvita.com///',
+        'http://igvita.com/../?#',
+        'http://igvita.com/a/../?',
+        'http://igvita.com/a/../?utm_source%3Danalytics'
+      ].each do |url|
+        expect(UrlParser.new(url).url)
+          .to eq 'http://igvita.com/'
+      end
+    end
+  end
+  context "#domain" do
+    let(:link) { 'https://github.com/pauldix/domainatrix' }
+    it "returns the domain name with suffix" do
+      expect(parser.domain).to eq 'github.com'
+    end
+  end
+  context "#subdomain" do
+    let(:link) { 'http://foo.bar.pauldix.co.uk/asdf.html?q=arg' }
+    it "returns all subdomains with suffix" do
+      expect(parser.subdomain).to eq 'foo.bar.pauldix.co.uk'
+    end
+    it "returns only the domain if there is no subdomain" do
+      url = UrlParser.new('https://github.com/')
+      expect(url.subdomain).to eq 'github.com'
+    end
+    it "does not include www as part of the subdomain" do
+      parser = UrlParser.new("http://www.energy.ca.gov/")
+      expect(parser.subdomain).to eq 'energy.ca.gov'
+    end
+    it "does not include any variation of www as part of the subdomain" do
+      [ 'ww2', 'www2', 'ww23', 'www23' ].each do |www|
+        parser = UrlParser.new("http://#{www}.energy.ca.gov/")
+        expect(parser.subdomain).to eq 'energy.ca.gov'
+      end
+    end
+  end
+end

data/url_parser.gemspec ADDED Viewed

@@ -0,0 +1,28 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'url_parser/version'
+Gem::Specification.new do |spec|
+  spec.name          = "url_parser"
+  spec.version       = UrlParser::VERSION
+  spec.authors       = ["Matt Solt"]
+  spec.email         = ["mattsolt@gmail.com"]
+  spec.summary       = %q{Combine PostRank-URI, Domainatrix, and other Ruby url parsing libraries into a common interface.}
+  spec.description   = %q{Uses PostRank-URI to clean, Addressable to break into components, and Domainatrix to determine domain and subdomain.}
+  spec.homepage      = "https://github.com/activefx/url_parser"
+  spec.license       = "MIT"
+  spec.files         = `git ls-files -z`.split("\x0")
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ["lib"]
+  spec.add_development_dependency "bundler", "~> 1.6"
+  spec.add_development_dependency "rake", "~> 10"
+  spec.add_development_dependency "rspec", "~> 3.0"
+  spec.add_dependency "domainatrix", ">= 0.0.11"
+  spec.add_dependency "postrank-uri", "~> 1.0"
+  spec.add_dependency "addressable", "~> 2.3"
+end

metadata ADDED Viewed

@@ -0,0 +1,143 @@
+--- !ruby/object:Gem::Specification
+name: url_parser
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Matt Solt
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2014-08-03 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.6'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.6'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10'
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.0'
+- !ruby/object:Gem::Dependency
+  name: domainatrix
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.0.11
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.0.11
+- !ruby/object:Gem::Dependency
+  name: postrank-uri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.0'
+- !ruby/object:Gem::Dependency
+  name: addressable
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.3'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.3'
+description: Uses PostRank-URI to clean, Addressable to break into components, and
+  Domainatrix to determine domain and subdomain.
+email:
+- mattsolt@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- ".gitignore"
+- ".rspec"
+- Gemfile
+- LICENSE.txt
+- README.md
+- Rakefile
+- lib/url_parser.rb
+- lib/url_parser/version.rb
+- spec/spec_helper.rb
+- spec/url_parser_spec.rb
+- url_parser.gemspec
+homepage: https://github.com/activefx/url_parser
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.2.2
+signing_key:
+specification_version: 4
+summary: Combine PostRank-URI, Domainatrix, and other Ruby url parsing libraries into
+  a common interface.
+test_files:
+- spec/spec_helper.rb
+- spec/url_parser_spec.rb