skyscraper 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/skyscraper.rb +2 -2
- data/lib/skyscraper/node.rb +97 -4
- data/lib/skyscraper/resource.rb +55 -0
- data/lib/skyscraper/version.rb +1 -1
- data/skyscraper.gemspec +2 -2
- data/spec/skyscraper/skyscraper/node_spec.rb +85 -0
- data/spec/skyscraper/skyscraper/{node/resource_spec.rb → resource_spec.rb} +17 -17
- data/spec/skyscraper/skyscraper/results_spec.rb +1 -1
- data/spec/skyscraper/skyscraper_spec.rb +1 -1
- data/spec/test_files/{skyscraper-node-base-a.html → skyscraper-node-a.html} +1 -1
- data/spec/test_files/{skyscraper-node-base-b.html → skyscraper-node-b.html} +0 -0
- data/spec/test_files/{skyscraper-node-base-traversing.html → skyscraper-node-traversing.html} +0 -0
- data/spec/test_files/{skyscraper-node-base.html → skyscraper-node.html} +1 -1
- data/spec/test_files/{skyscraper-node-resource-b.html → skyscraper-resource-b.html} +0 -0
- data/spec/test_files/{skyscraper-node-resource-image.png → skyscraper-resource-image.png} +0 -0
- data/spec/test_files/{skyscraper-node-resource.html → skyscraper-resource.html} +2 -2
- metadata +34 -36
- data/lib/skyscraper/node/base.rb +0 -103
- data/lib/skyscraper/node/resource.rb +0 -57
- data/spec/skyscraper/skyscraper/node/base_spec.rb +0 -87
    
        data/lib/skyscraper.rb
    CHANGED
    
    | @@ -16,6 +16,7 @@ module Skyscraper | |
| 16 16 | 
             
              autoload :Node
         | 
| 17 17 | 
             
              autoload :Pages
         | 
| 18 18 | 
             
              autoload :Path
         | 
| 19 | 
            +
              autoload :Resource
         | 
| 19 20 | 
             
              autoload :Results
         | 
| 20 21 |  | 
| 21 22 | 
             
              mattr_accessor :defaults
         | 
| @@ -24,7 +25,6 @@ module Skyscraper | |
| 24 25 | 
             
                limit: nil,
         | 
| 25 26 | 
             
                encoding: "utf-8",
         | 
| 26 27 | 
             
                download_path: "/tmp/skyscraper/:sequence/:file_name", 
         | 
| 27 | 
            -
            #    reattempt_times: 1,
         | 
| 28 28 | 
             
                noise_errors: true,
         | 
| 29 29 | 
             
                skip_on_error: true
         | 
| 30 30 | 
             
              }
         | 
| @@ -35,7 +35,7 @@ module Skyscraper | |
| 35 35 |  | 
| 36 36 | 
             
              def self.fetch path, encoding = Skyscraper.config.encoding
         | 
| 37 37 | 
             
                document = Skyscraper::Document.load path, encoding
         | 
| 38 | 
            -
                Node | 
| 38 | 
            +
                Node.new document.css("html")
         | 
| 39 39 | 
             
              end
         | 
| 40 40 |  | 
| 41 41 | 
             
              def fetch
         | 
    
        data/lib/skyscraper/node.rb
    CHANGED
    
    | @@ -1,8 +1,101 @@ | |
| 1 1 | 
             
            module Skyscraper
         | 
| 2 | 
            -
               | 
| 3 | 
            -
                 | 
| 2 | 
            +
              class Node
         | 
| 3 | 
            +
                alias :original_class :class
         | 
| 4 4 |  | 
| 5 | 
            -
                 | 
| 6 | 
            -
             | 
| 5 | 
            +
                attr_accessor :element
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                def initialize element
         | 
| 8 | 
            +
                  @element = element
         | 
| 9 | 
            +
                end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                def first selector
         | 
| 12 | 
            +
                  self.find(selector).first
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                def find selector
         | 
| 16 | 
            +
                  @element.css(selector).map do |element|
         | 
| 17 | 
            +
                    Node.new(element)
         | 
| 18 | 
            +
                  end
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                def children selector = nil
         | 
| 22 | 
            +
                  if selector
         | 
| 23 | 
            +
                    children = @element.css(selector)
         | 
| 24 | 
            +
                  else
         | 
| 25 | 
            +
                    children = @element.children
         | 
| 26 | 
            +
                  end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                  children.select do |element|
         | 
| 29 | 
            +
                    element.parent == @element and element.is_a?(Nokogiri::XML::Element)
         | 
| 30 | 
            +
                  end.map do |child|
         | 
| 31 | 
            +
                    Node.new(child)
         | 
| 32 | 
            +
                  end 
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                def parent
         | 
| 36 | 
            +
                  if @element.parent.is_a? Nokogiri::XML::Element
         | 
| 37 | 
            +
                    Node.new @element.parent
         | 
| 38 | 
            +
                  end
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                def have_parent?
         | 
| 42 | 
            +
                  self.parent.present? 
         | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                def parents selector = nil
         | 
| 46 | 
            +
                  node = self
         | 
| 47 | 
            +
                  parents = []
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                  while node.have_parent?
         | 
| 50 | 
            +
                    node = node.parent
         | 
| 51 | 
            +
                    parents << node
         | 
| 52 | 
            +
                  end
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                  parents.select! do |item|
         | 
| 55 | 
            +
                    item.element.matches? selector
         | 
| 56 | 
            +
                  end if selector
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                  parents
         | 
| 59 | 
            +
                end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                def siblings
         | 
| 62 | 
            +
                  self.parent.children.select do |node|
         | 
| 63 | 
            +
                    node.element != self.element
         | 
| 64 | 
            +
                  end
         | 
| 65 | 
            +
                end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                def follow 
         | 
| 68 | 
            +
                  if self.href
         | 
| 69 | 
            +
                    Skyscraper::fetch(self.uri)
         | 
| 70 | 
            +
                  end
         | 
| 71 | 
            +
                end
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                def html
         | 
| 74 | 
            +
                  @element.children.to_html
         | 
| 75 | 
            +
                end
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                def class
         | 
| 78 | 
            +
                  @element.attribute("class").to_s
         | 
| 79 | 
            +
                end
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                def download options = {}
         | 
| 82 | 
            +
                  Resource.new(self).download(options)
         | 
| 83 | 
            +
                end
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                def uri
         | 
| 86 | 
            +
                  @element.document.path.full_path_for(self.href)
         | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                def method_missing name
         | 
| 90 | 
            +
                  @element.attribute(name.to_s).to_s
         | 
| 91 | 
            +
                end
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                def text
         | 
| 94 | 
            +
                  @element.content.to_s.strip
         | 
| 95 | 
            +
                end
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                def tag
         | 
| 98 | 
            +
                  @element.name
         | 
| 99 | 
            +
                end
         | 
| 7 100 | 
             
              end
         | 
| 8 101 | 
             
            end
         | 
| @@ -0,0 +1,55 @@ | |
| 1 | 
            +
            module Skyscraper
         | 
| 2 | 
            +
              class Resource
         | 
| 3 | 
            +
                def initialize node
         | 
| 4 | 
            +
                  @node           = node
         | 
| 5 | 
            +
                  @path           = extract_path_from_node(@node)
         | 
| 6 | 
            +
                end
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                def download options = {}
         | 
| 9 | 
            +
                  name          = options[:file_name] || @path.file_name
         | 
| 10 | 
            +
                  new_file_path = replace_path_variables(options[:path] || Skyscraper.config.download_path, name)
         | 
| 11 | 
            +
                  temp_file     = open(@path.full_path)
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                  copy temp_file.path, new_file_path
         | 
| 14 | 
            +
                  new_file_path
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                private 
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                def copy from, to 
         | 
| 20 | 
            +
                  create_path_if_not_exists to
         | 
| 21 | 
            +
                  `cp #{from} #{to}`
         | 
| 22 | 
            +
                end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                def create_path_if_not_exists path
         | 
| 25 | 
            +
                  `mkdir -p #{path}` unless File.directory?(path)
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                def replace_path_variables path, name
         | 
| 29 | 
            +
                  new_path = path.dup
         | 
| 30 | 
            +
                  new_path.gsub! /:file_name/, name
         | 
| 31 | 
            +
                  new_path.gsub! /:sequence/, get_sequence_number_for(new_path)
         | 
| 32 | 
            +
                  new_path
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                def get_sequence_number_for path
         | 
| 36 | 
            +
                  new_path = path.split(":sequence")[0]
         | 
| 37 | 
            +
                  if File.directory?(new_path)
         | 
| 38 | 
            +
                    entries = Dir.entries(new_path).select { |i| i =~ /^\d+$/ } || []
         | 
| 39 | 
            +
                    last = entries.sort.last.to_i
         | 
| 40 | 
            +
                    last += 1
         | 
| 41 | 
            +
                    last.to_s
         | 
| 42 | 
            +
                  else
         | 
| 43 | 
            +
                    "1"
         | 
| 44 | 
            +
                  end
         | 
| 45 | 
            +
                end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                def extract_path_from_node node
         | 
| 48 | 
            +
                  if href_or_src = node.href.present? ? node.href : node.src
         | 
| 49 | 
            +
                    node.element.document.path.path_for(href_or_src)
         | 
| 50 | 
            +
                  else
         | 
| 51 | 
            +
                    throw Exception.new("no href no src")
         | 
| 52 | 
            +
                  end
         | 
| 53 | 
            +
                end
         | 
| 54 | 
            +
              end
         | 
| 55 | 
            +
            end
         | 
    
        data/lib/skyscraper/version.rb
    CHANGED
    
    
    
        data/skyscraper.gemspec
    CHANGED
    
    | @@ -4,8 +4,8 @@ require File.expand_path('../lib/skyscraper/version', __FILE__) | |
| 4 4 | 
             
            Gem::Specification.new do |gem|
         | 
| 5 5 | 
             
              gem.authors       = ["Adam Dratwinski"]
         | 
| 6 6 | 
             
              gem.email         = ["arboooz@gmail.com"]
         | 
| 7 | 
            -
              gem.summary       = %q{ | 
| 8 | 
            -
              gem.description   = %q{ | 
| 7 | 
            +
              gem.summary       = %q{Easy to use DSL that helps scraping data from websites}
         | 
| 8 | 
            +
              gem.description   = %q{Easy to use DSL that helps scraping data from websites. Thanks to it, writing web crawlers would be very fast and intuitive. Traversing through html nodes and fetching all of the HTML attributes, would be possible. Just like in jQuery - you will find methods like parent, children, first, find, siblings etc. Furthermore, you are able to download images, web pages, and store all content in the database. Please visit my Github account for more details.}
         | 
| 9 9 | 
             
              gem.homepage      = "https://github.com/boooz/skyscraper"
         | 
| 10 10 |  | 
| 11 11 | 
             
              gem.files         = `git ls-files`.split($\)
         | 
| @@ -1,2 +1,87 @@ | |
| 1 1 | 
             
            describe Skyscraper::Node do
         | 
| 2 | 
            +
              describe "when is initialized" do
         | 
| 3 | 
            +
                before(:each) do
         | 
| 4 | 
            +
                  @node = Skyscraper::fetch(path_to("skyscraper-node.html")).first("div.item")
         | 
| 5 | 
            +
                end
         | 
| 6 | 
            +
                
         | 
| 7 | 
            +
                it "should returns html code" do
         | 
| 8 | 
            +
                  @node.html.should include "<strong class=\"name\">Name value</strong>"
         | 
| 9 | 
            +
                end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                it "should returns class name" do
         | 
| 12 | 
            +
                  @node.class.should == "item"
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                it "should be auto converted to string with stripped tags" do
         | 
| 16 | 
            +
                  @node.text.should == "Name value"
         | 
| 17 | 
            +
                end
         | 
| 18 | 
            +
              end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              it "should follow links" do
         | 
| 21 | 
            +
                Skyscraper::fetch(path_to("skyscraper-node.html")).first("li a").follow.first("h1").text.should == "Hello from A"
         | 
| 22 | 
            +
              end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
              it "should deep follow links" do
         | 
| 25 | 
            +
                Skyscraper::fetch(path_to("skyscraper-node.html")).first("li a").follow.first("a").follow.first("h1").text.should == "Hello from B"
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
              it "should download page" do
         | 
| 29 | 
            +
                remove_test_directory
         | 
| 30 | 
            +
                Skyscraper.config.download_path = "/tmp/skyscraper_test/nodes/:file_name"
         | 
| 31 | 
            +
                file = Skyscraper::fetch(path_to("skyscraper-node.html")).first("li a").follow.first("a").download
         | 
| 32 | 
            +
                File.exists?(file).should == true
         | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
              describe "traversing" do
         | 
| 35 | 
            +
                before(:each) do
         | 
| 36 | 
            +
                  @node = Skyscraper::fetch(path_to("skyscraper-node-traversing.html")).first(".menu")
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                it "should find descendands items" do
         | 
| 40 | 
            +
                  result = @node.find("li")
         | 
| 41 | 
            +
                  result.length.should == 5
         | 
| 42 | 
            +
                  result.map(&:text).should include "Item 4 1"
         | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                it "should returns children of element with selector" do
         | 
| 46 | 
            +
                  node = Skyscraper::fetch(path_to("skyscraper-node-traversing.html")).first("#parent-3")
         | 
| 47 | 
            +
                  node.children(".a").length.should == 4
         | 
| 48 | 
            +
                  node.children(".b").length.should == 2
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                it "should returns children of element without selector" do
         | 
| 52 | 
            +
                  result = @node.children
         | 
| 53 | 
            +
                  result.length.should == 4
         | 
| 54 | 
            +
                  result.map(&:to_s).should_not include "Item 4 1"
         | 
| 55 | 
            +
                end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                it "should returns first element" do
         | 
| 58 | 
            +
                  @node.first("li").class.should == "item-1"
         | 
| 59 | 
            +
                end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                it "should returns parent of item" do
         | 
| 62 | 
            +
                  @node.parent.class.should == "parent-2"
         | 
| 63 | 
            +
                end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                it "should tells if element have parent" do
         | 
| 66 | 
            +
                  @node.have_parent?.should == true
         | 
| 67 | 
            +
                  @node.parents("html").first.have_parent?.should == false
         | 
| 68 | 
            +
                end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                it "should returns parents of item" do
         | 
| 71 | 
            +
                  @node.parents.length.should == 4
         | 
| 72 | 
            +
                end
         | 
| 73 | 
            +
                
         | 
| 74 | 
            +
                it "should returns parents of item matched by selector" do
         | 
| 75 | 
            +
                  @node.parents("div").length.should == 2
         | 
| 76 | 
            +
                end
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                it "should returns siblings of item" do
         | 
| 79 | 
            +
                  @node.first(".item-3").siblings.length.should == 3
         | 
| 80 | 
            +
                end
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                it "should returns node tag" do
         | 
| 83 | 
            +
                  @node.tag.should == "ul"
         | 
| 84 | 
            +
                end
         | 
| 85 | 
            +
              end
         | 
| 2 86 | 
             
            end
         | 
| 87 | 
            +
             | 
| @@ -1,12 +1,12 @@ | |
| 1 | 
            -
            describe Skyscraper:: | 
| 1 | 
            +
            describe Skyscraper::Resource do
         | 
| 2 2 | 
             
              def should_download_resource_to node, path, options = {}
         | 
| 3 | 
            -
                resource = Skyscraper:: | 
| 3 | 
            +
                resource = Skyscraper::Resource.new(node)
         | 
| 4 4 | 
             
                resource.download(options).should == path
         | 
| 5 5 | 
             
              end
         | 
| 6 6 |  | 
| 7 7 | 
             
              before(:all) do
         | 
| 8 8 | 
             
                Skyscraper.config.download_path = "/tmp/skyscraper_test/:sequence/:file_name"
         | 
| 9 | 
            -
                @node = Skyscraper::fetch(path_to("skyscraper- | 
| 9 | 
            +
                @node = Skyscraper::fetch(path_to("skyscraper-resource.html")).first("a")
         | 
| 10 10 | 
             
              end
         | 
| 11 11 |  | 
| 12 12 | 
             
              before(:each) do
         | 
| @@ -15,29 +15,29 @@ describe Skyscraper::Node::Resource do | |
| 15 15 |  | 
| 16 16 | 
             
              it "should create path if not exists when downloaded" do
         | 
| 17 17 | 
             
                File.directory?("/tmp/skyscraper_test/1").should == false
         | 
| 18 | 
            -
                Skyscraper:: | 
| 18 | 
            +
                Skyscraper::Resource.new(@node)
         | 
| 19 19 | 
             
                File.directory?("/tmp/skyscraper_test/1").should == false
         | 
| 20 | 
            -
                Skyscraper:: | 
| 20 | 
            +
                Skyscraper::Resource.new(@node).download
         | 
| 21 21 | 
             
                File.directory?("/tmp/skyscraper_test/1").should == true
         | 
| 22 22 | 
             
              end
         | 
| 23 23 |  | 
| 24 24 | 
             
              it "should not fail if path already exists" do
         | 
| 25 | 
            -
                Skyscraper:: | 
| 25 | 
            +
                Skyscraper::Resource.new(@node).download path: "/tmp/skyscraper_test/some_directory/:file_name"
         | 
| 26 26 | 
             
                File.directory?("/tmp/skyscraper_test/some_directory").should == true
         | 
| 27 | 
            -
                Skyscraper:: | 
| 27 | 
            +
                Skyscraper::Resource.new(@node).download
         | 
| 28 28 | 
             
                File.directory?("/tmp/skyscraper_test/some_directory").should == true
         | 
| 29 29 | 
             
              end
         | 
| 30 30 |  | 
| 31 31 | 
             
              it "should have file name" do
         | 
| 32 | 
            -
                resource = Skyscraper:: | 
| 33 | 
            -
                resource.download.should == "/tmp/skyscraper_test/1/skyscraper- | 
| 32 | 
            +
                resource = Skyscraper::Resource.new(@node)
         | 
| 33 | 
            +
                resource.download.should == "/tmp/skyscraper_test/1/skyscraper-resource-b.html"
         | 
| 34 34 | 
             
              end
         | 
| 35 35 |  | 
| 36 36 | 
             
              it "should create path with :sequence variable" do
         | 
| 37 37 | 
             
                download_to = "/tmp/skyscraper_test/sequences/:sequence/:file_name"
         | 
| 38 | 
            -
                should_download_resource_to @node, "/tmp/skyscraper_test/sequences/1/skyscraper- | 
| 39 | 
            -
                should_download_resource_to @node, "/tmp/skyscraper_test/sequences/2/skyscraper- | 
| 40 | 
            -
                should_download_resource_to @node, "/tmp/skyscraper_test/sequences/3/skyscraper- | 
| 38 | 
            +
                should_download_resource_to @node, "/tmp/skyscraper_test/sequences/1/skyscraper-resource-b.html", path: download_to
         | 
| 39 | 
            +
                should_download_resource_to @node, "/tmp/skyscraper_test/sequences/2/skyscraper-resource-b.html", path: download_to
         | 
| 40 | 
            +
                should_download_resource_to @node, "/tmp/skyscraper_test/sequences/3/skyscraper-resource-b.html", path: download_to
         | 
| 41 41 | 
             
              end
         | 
| 42 42 |  | 
| 43 43 | 
             
              it "should create custom file name if provided" do
         | 
| @@ -46,13 +46,13 @@ describe Skyscraper::Node::Resource do | |
| 46 46 | 
             
              end
         | 
| 47 47 |  | 
| 48 48 | 
             
              it "should download resource" do
         | 
| 49 | 
            -
                Skyscraper:: | 
| 50 | 
            -
                File.exists?("/tmp/skyscraper_test/1/skyscraper- | 
| 49 | 
            +
                Skyscraper::Resource.new(@node).download
         | 
| 50 | 
            +
                File.exists?("/tmp/skyscraper_test/1/skyscraper-resource-b.html").should == true
         | 
| 51 51 | 
             
              end
         | 
| 52 52 |  | 
| 53 53 | 
             
              it "should download image" do
         | 
| 54 | 
            -
                image_node = Skyscraper::fetch(path_to("skyscraper- | 
| 55 | 
            -
                Skyscraper:: | 
| 56 | 
            -
                File.exists?("/tmp/skyscraper_test/1/skyscraper- | 
| 54 | 
            +
                image_node = Skyscraper::fetch(path_to("skyscraper-resource.html")).first("img")
         | 
| 55 | 
            +
                Skyscraper::Resource.new(image_node).download
         | 
| 56 | 
            +
                File.exists?("/tmp/skyscraper_test/1/skyscraper-resource-image.png").should == true
         | 
| 57 57 | 
             
              end
         | 
| 58 58 | 
             
            end
         | 
| @@ -69,7 +69,7 @@ describe Skyscraper::Results do | |
| 69 69 | 
             
                  @call_count = 0
         | 
| 70 70 | 
             
                  callback = proc do |result, page|  
         | 
| 71 71 | 
             
                    result.should be_an_instance_of(Hash)
         | 
| 72 | 
            -
                    page.should be_an_instance_of(Skyscraper::Node | 
| 72 | 
            +
                    page.should be_an_instance_of(Skyscraper::Node)
         | 
| 73 73 | 
             
                    @call_count += 1
         | 
| 74 74 | 
             
                  end
         | 
| 75 75 |  | 
| @@ -16,7 +16,7 @@ describe Skyscraper do | |
| 16 16 | 
             
              end
         | 
| 17 17 |  | 
| 18 18 | 
             
              it "should fetch remote page" do
         | 
| 19 | 
            -
                Skyscraper::fetch("http://google.com").should be_an Skyscraper::Node | 
| 19 | 
            +
                Skyscraper::fetch("http://google.com").should be_an Skyscraper::Node
         | 
| 20 20 | 
             
              end
         | 
| 21 21 |  | 
| 22 22 | 
             
              it "static method fetch should works" do
         | 
| 
            File without changes
         | 
    
        data/spec/test_files/{skyscraper-node-base-traversing.html → skyscraper-node-traversing.html}
    RENAMED
    
    | 
            File without changes
         | 
| 
            File without changes
         | 
| 
            File without changes
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: skyscraper
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.5
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,11 +9,11 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2012-05- | 
| 12 | 
            +
            date: 2012-05-21 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: rspec
         | 
| 16 | 
            -
              requirement: & | 
| 16 | 
            +
              requirement: &77062930 !ruby/object:Gem::Requirement
         | 
| 17 17 | 
             
                none: false
         | 
| 18 18 | 
             
                requirements:
         | 
| 19 19 | 
             
                - - ! '>='
         | 
| @@ -21,10 +21,10 @@ dependencies: | |
| 21 21 | 
             
                    version: '0'
         | 
| 22 22 | 
             
              type: :development
         | 
| 23 23 | 
             
              prerelease: false
         | 
| 24 | 
            -
              version_requirements: * | 
| 24 | 
            +
              version_requirements: *77062930
         | 
| 25 25 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 26 26 | 
             
              name: rake
         | 
| 27 | 
            -
              requirement: & | 
| 27 | 
            +
              requirement: &77124830 !ruby/object:Gem::Requirement
         | 
| 28 28 | 
             
                none: false
         | 
| 29 29 | 
             
                requirements:
         | 
| 30 30 | 
             
                - - ! '>='
         | 
| @@ -32,10 +32,10 @@ dependencies: | |
| 32 32 | 
             
                    version: '0'
         | 
| 33 33 | 
             
              type: :development
         | 
| 34 34 | 
             
              prerelease: false
         | 
| 35 | 
            -
              version_requirements: * | 
| 35 | 
            +
              version_requirements: *77124830
         | 
| 36 36 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 37 37 | 
             
              name: nokogiri
         | 
| 38 | 
            -
              requirement: & | 
| 38 | 
            +
              requirement: &77122840 !ruby/object:Gem::Requirement
         | 
| 39 39 | 
             
                none: false
         | 
| 40 40 | 
             
                requirements:
         | 
| 41 41 | 
             
                - - ! '>='
         | 
| @@ -43,10 +43,10 @@ dependencies: | |
| 43 43 | 
             
                    version: '0'
         | 
| 44 44 | 
             
              type: :runtime
         | 
| 45 45 | 
             
              prerelease: false
         | 
| 46 | 
            -
              version_requirements: * | 
| 46 | 
            +
              version_requirements: *77122840
         | 
| 47 47 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 48 48 | 
             
              name: actionpack
         | 
| 49 | 
            -
              requirement: & | 
| 49 | 
            +
              requirement: &77119590 !ruby/object:Gem::Requirement
         | 
| 50 50 | 
             
                none: false
         | 
| 51 51 | 
             
                requirements:
         | 
| 52 52 | 
             
                - - ! '>='
         | 
| @@ -54,12 +54,13 @@ dependencies: | |
| 54 54 | 
             
                    version: '0'
         | 
| 55 55 | 
             
              type: :runtime
         | 
| 56 56 | 
             
              prerelease: false
         | 
| 57 | 
            -
              version_requirements: * | 
| 58 | 
            -
            description:  | 
| 59 | 
            -
               | 
| 60 | 
            -
               | 
| 61 | 
            -
               | 
| 62 | 
            -
               | 
| 57 | 
            +
              version_requirements: *77119590
         | 
| 58 | 
            +
            description: Easy to use DSL that helps scraping data from websites. Thanks to it,
         | 
| 59 | 
            +
              writing web crawlers would be very fast and intuitive. Traversing through html nodes
         | 
| 60 | 
            +
              and fetching all of the HTML attributes, would be possible. Just like in jQuery
         | 
| 61 | 
            +
              - you will find methods like parent, children, first, find, siblings etc. Furthermore,
         | 
| 62 | 
            +
              you are able to download images, web pages, and store all content in the database.
         | 
| 63 | 
            +
              Please visit my Github account for more details.
         | 
| 63 64 | 
             
            email:
         | 
| 64 65 | 
             
            - arboooz@gmail.com
         | 
| 65 66 | 
             
            executables: []
         | 
| @@ -78,13 +79,12 @@ files: | |
| 78 79 | 
             
            - lib/skyscraper/document.rb
         | 
| 79 80 | 
             
            - lib/skyscraper/field.rb
         | 
| 80 81 | 
             
            - lib/skyscraper/node.rb
         | 
| 81 | 
            -
            - lib/skyscraper/node/base.rb
         | 
| 82 | 
            -
            - lib/skyscraper/node/resource.rb
         | 
| 83 82 | 
             
            - lib/skyscraper/pages.rb
         | 
| 84 83 | 
             
            - lib/skyscraper/path.rb
         | 
| 85 84 | 
             
            - lib/skyscraper/path/base.rb
         | 
| 86 85 | 
             
            - lib/skyscraper/path/local.rb
         | 
| 87 86 | 
             
            - lib/skyscraper/path/remote.rb
         | 
| 87 | 
            +
            - lib/skyscraper/resource.rb
         | 
| 88 88 | 
             
            - lib/skyscraper/results.rb
         | 
| 89 89 | 
             
            - lib/skyscraper/version.rb
         | 
| 90 90 | 
             
            - skyscraper.gemspec
         | 
| @@ -92,11 +92,10 @@ files: | |
| 92 92 | 
             
            - spec/skyscraper/skyscraper/config_spec.rb
         | 
| 93 93 | 
             
            - spec/skyscraper/skyscraper/document_spec.rb
         | 
| 94 94 | 
             
            - spec/skyscraper/skyscraper/field_spec.rb
         | 
| 95 | 
            -
            - spec/skyscraper/skyscraper/node/base_spec.rb
         | 
| 96 | 
            -
            - spec/skyscraper/skyscraper/node/resource_spec.rb
         | 
| 97 95 | 
             
            - spec/skyscraper/skyscraper/node_spec.rb
         | 
| 98 96 | 
             
            - spec/skyscraper/skyscraper/pages_spec.rb
         | 
| 99 97 | 
             
            - spec/skyscraper/skyscraper/path_spec.rb
         | 
| 98 | 
            +
            - spec/skyscraper/skyscraper/resource_spec.rb
         | 
| 100 99 | 
             
            - spec/skyscraper/skyscraper/results_spec.rb
         | 
| 101 100 | 
             
            - spec/skyscraper/skyscraper_spec.rb
         | 
| 102 101 | 
             
            - spec/spec_helper.rb
         | 
| @@ -108,14 +107,14 @@ files: | |
| 108 107 | 
             
            - spec/test_files/skyscraper-fetch-2.html
         | 
| 109 108 | 
             
            - spec/test_files/skyscraper-fetch.html
         | 
| 110 109 | 
             
            - spec/test_files/skyscraper-field.html
         | 
| 111 | 
            -
            - spec/test_files/skyscraper-node- | 
| 112 | 
            -
            - spec/test_files/skyscraper-node- | 
| 113 | 
            -
            - spec/test_files/skyscraper-node- | 
| 114 | 
            -
            - spec/test_files/skyscraper-node | 
| 115 | 
            -
            - spec/test_files/skyscraper-node-resource-b.html
         | 
| 116 | 
            -
            - spec/test_files/skyscraper-node-resource-image.png
         | 
| 117 | 
            -
            - spec/test_files/skyscraper-node-resource.html
         | 
| 110 | 
            +
            - spec/test_files/skyscraper-node-a.html
         | 
| 111 | 
            +
            - spec/test_files/skyscraper-node-b.html
         | 
| 112 | 
            +
            - spec/test_files/skyscraper-node-traversing.html
         | 
| 113 | 
            +
            - spec/test_files/skyscraper-node.html
         | 
| 118 114 | 
             
            - spec/test_files/skyscraper-pages.html
         | 
| 115 | 
            +
            - spec/test_files/skyscraper-resource-b.html
         | 
| 116 | 
            +
            - spec/test_files/skyscraper-resource-image.png
         | 
| 117 | 
            +
            - spec/test_files/skyscraper-resource.html
         | 
| 119 118 | 
             
            - spec/test_files/skyscraper.html
         | 
| 120 119 | 
             
            homepage: https://github.com/boooz/skyscraper
         | 
| 121 120 | 
             
            licenses: []
         | 
| @@ -140,17 +139,16 @@ rubyforge_project: | |
| 140 139 | 
             
            rubygems_version: 1.8.15
         | 
| 141 140 | 
             
            signing_key: 
         | 
| 142 141 | 
             
            specification_version: 3
         | 
| 143 | 
            -
            summary:  | 
| 142 | 
            +
            summary: Easy to use DSL that helps scraping data from websites
         | 
| 144 143 | 
             
            test_files:
         | 
| 145 144 | 
             
            - spec/skyscraper/skyscraper/base_spec.rb
         | 
| 146 145 | 
             
            - spec/skyscraper/skyscraper/config_spec.rb
         | 
| 147 146 | 
             
            - spec/skyscraper/skyscraper/document_spec.rb
         | 
| 148 147 | 
             
            - spec/skyscraper/skyscraper/field_spec.rb
         | 
| 149 | 
            -
            - spec/skyscraper/skyscraper/node/base_spec.rb
         | 
| 150 | 
            -
            - spec/skyscraper/skyscraper/node/resource_spec.rb
         | 
| 151 148 | 
             
            - spec/skyscraper/skyscraper/node_spec.rb
         | 
| 152 149 | 
             
            - spec/skyscraper/skyscraper/pages_spec.rb
         | 
| 153 150 | 
             
            - spec/skyscraper/skyscraper/path_spec.rb
         | 
| 151 | 
            +
            - spec/skyscraper/skyscraper/resource_spec.rb
         | 
| 154 152 | 
             
            - spec/skyscraper/skyscraper/results_spec.rb
         | 
| 155 153 | 
             
            - spec/skyscraper/skyscraper_spec.rb
         | 
| 156 154 | 
             
            - spec/spec_helper.rb
         | 
| @@ -162,12 +160,12 @@ test_files: | |
| 162 160 | 
             
            - spec/test_files/skyscraper-fetch-2.html
         | 
| 163 161 | 
             
            - spec/test_files/skyscraper-fetch.html
         | 
| 164 162 | 
             
            - spec/test_files/skyscraper-field.html
         | 
| 165 | 
            -
            - spec/test_files/skyscraper-node- | 
| 166 | 
            -
            - spec/test_files/skyscraper-node- | 
| 167 | 
            -
            - spec/test_files/skyscraper-node- | 
| 168 | 
            -
            - spec/test_files/skyscraper-node | 
| 169 | 
            -
            - spec/test_files/skyscraper-node-resource-b.html
         | 
| 170 | 
            -
            - spec/test_files/skyscraper-node-resource-image.png
         | 
| 171 | 
            -
            - spec/test_files/skyscraper-node-resource.html
         | 
| 163 | 
            +
            - spec/test_files/skyscraper-node-a.html
         | 
| 164 | 
            +
            - spec/test_files/skyscraper-node-b.html
         | 
| 165 | 
            +
            - spec/test_files/skyscraper-node-traversing.html
         | 
| 166 | 
            +
            - spec/test_files/skyscraper-node.html
         | 
| 172 167 | 
             
            - spec/test_files/skyscraper-pages.html
         | 
| 168 | 
            +
            - spec/test_files/skyscraper-resource-b.html
         | 
| 169 | 
            +
            - spec/test_files/skyscraper-resource-image.png
         | 
| 170 | 
            +
            - spec/test_files/skyscraper-resource.html
         | 
| 173 171 | 
             
            - spec/test_files/skyscraper.html
         | 
    
        data/lib/skyscraper/node/base.rb
    DELETED
    
    | @@ -1,103 +0,0 @@ | |
| 1 | 
            -
            module Skyscraper
         | 
| 2 | 
            -
              module Node
         | 
| 3 | 
            -
                class Base
         | 
| 4 | 
            -
                  alias :original_class :class
         | 
| 5 | 
            -
             | 
| 6 | 
            -
                  attr_accessor :element
         | 
| 7 | 
            -
             | 
| 8 | 
            -
                  def initialize element
         | 
| 9 | 
            -
                    @element = element
         | 
| 10 | 
            -
                  end
         | 
| 11 | 
            -
             | 
| 12 | 
            -
                  def first selector
         | 
| 13 | 
            -
                    self.find(selector).first
         | 
| 14 | 
            -
                  end
         | 
| 15 | 
            -
             | 
| 16 | 
            -
                  def find selector
         | 
| 17 | 
            -
                    @element.css(selector).map do |element|
         | 
| 18 | 
            -
                      Base.new(element)
         | 
| 19 | 
            -
                    end
         | 
| 20 | 
            -
                  end
         | 
| 21 | 
            -
             | 
| 22 | 
            -
                  def children selector = nil
         | 
| 23 | 
            -
                    if selector
         | 
| 24 | 
            -
                      children = @element.css(selector)
         | 
| 25 | 
            -
                    else
         | 
| 26 | 
            -
                      children = @element.children
         | 
| 27 | 
            -
                    end
         | 
| 28 | 
            -
             | 
| 29 | 
            -
                    children.select do |element|
         | 
| 30 | 
            -
                      element.parent == @element and element.is_a?(Nokogiri::XML::Element)
         | 
| 31 | 
            -
                    end.map do |child|
         | 
| 32 | 
            -
                      Base.new(child)
         | 
| 33 | 
            -
                    end 
         | 
| 34 | 
            -
                  end
         | 
| 35 | 
            -
             | 
| 36 | 
            -
                  def parent
         | 
| 37 | 
            -
                    if @element.parent.is_a? Nokogiri::XML::Element
         | 
| 38 | 
            -
                      Base.new @element.parent
         | 
| 39 | 
            -
                    end
         | 
| 40 | 
            -
                  end
         | 
| 41 | 
            -
             | 
| 42 | 
            -
                  def have_parent?
         | 
| 43 | 
            -
                    self.parent.present? 
         | 
| 44 | 
            -
                  end
         | 
| 45 | 
            -
             | 
| 46 | 
            -
                  def parents selector = nil
         | 
| 47 | 
            -
                    node = self
         | 
| 48 | 
            -
                    parents = []
         | 
| 49 | 
            -
             | 
| 50 | 
            -
                    while node.have_parent?
         | 
| 51 | 
            -
                      node = node.parent
         | 
| 52 | 
            -
                      parents << node
         | 
| 53 | 
            -
                    end
         | 
| 54 | 
            -
             | 
| 55 | 
            -
                    parents.select! do |item|
         | 
| 56 | 
            -
                      item.element.matches? selector
         | 
| 57 | 
            -
                    end if selector
         | 
| 58 | 
            -
             | 
| 59 | 
            -
                    parents
         | 
| 60 | 
            -
                  end
         | 
| 61 | 
            -
             | 
| 62 | 
            -
                  def siblings
         | 
| 63 | 
            -
                    self.parent.children.select do |node|
         | 
| 64 | 
            -
                      node.element != self.element
         | 
| 65 | 
            -
                    end
         | 
| 66 | 
            -
                  end
         | 
| 67 | 
            -
             | 
| 68 | 
            -
                  def follow 
         | 
| 69 | 
            -
                    if self.href
         | 
| 70 | 
            -
                      Skyscraper::fetch(self.uri)
         | 
| 71 | 
            -
                    end
         | 
| 72 | 
            -
                  end
         | 
| 73 | 
            -
             | 
| 74 | 
            -
                  def html
         | 
| 75 | 
            -
                    @element.children.to_html
         | 
| 76 | 
            -
                  end
         | 
| 77 | 
            -
             | 
| 78 | 
            -
                  def class
         | 
| 79 | 
            -
                    @element.attribute("class").to_s
         | 
| 80 | 
            -
                  end
         | 
| 81 | 
            -
             | 
| 82 | 
            -
                  def download options = {}
         | 
| 83 | 
            -
                    Resource.new(self).download(options)
         | 
| 84 | 
            -
                  end
         | 
| 85 | 
            -
             | 
| 86 | 
            -
                  def uri
         | 
| 87 | 
            -
                    @element.document.path.full_path_for(self.href)
         | 
| 88 | 
            -
                  end
         | 
| 89 | 
            -
             | 
| 90 | 
            -
                  def method_missing name
         | 
| 91 | 
            -
                    @element.attribute(name.to_s).to_s
         | 
| 92 | 
            -
                  end
         | 
| 93 | 
            -
             | 
| 94 | 
            -
                  def text
         | 
| 95 | 
            -
                    @element.content.to_s.strip
         | 
| 96 | 
            -
                  end
         | 
| 97 | 
            -
             | 
| 98 | 
            -
                  def tag
         | 
| 99 | 
            -
            	@element.name
         | 
| 100 | 
            -
                  end
         | 
| 101 | 
            -
                end
         | 
| 102 | 
            -
              end
         | 
| 103 | 
            -
            end
         | 
| @@ -1,57 +0,0 @@ | |
| 1 | 
            -
            module Skyscraper
         | 
| 2 | 
            -
              module Node
         | 
| 3 | 
            -
                class Resource
         | 
| 4 | 
            -
                  def initialize node
         | 
| 5 | 
            -
                    @node           = node
         | 
| 6 | 
            -
                    @path           = extract_path_from_node(@node)
         | 
| 7 | 
            -
                  end
         | 
| 8 | 
            -
             | 
| 9 | 
            -
                  def download options = {}
         | 
| 10 | 
            -
                    @name          = options[:file_name] || @path.file_name
         | 
| 11 | 
            -
                    @new_file_path = replace_path_variables(options[:path] || Skyscraper.config.download_path)
         | 
| 12 | 
            -
                    @temp_file     = open(@path.full_path)
         | 
| 13 | 
            -
             | 
| 14 | 
            -
                    copy @temp_file.path, @new_file_path
         | 
| 15 | 
            -
                    @new_file_path
         | 
| 16 | 
            -
                  end
         | 
| 17 | 
            -
             | 
| 18 | 
            -
                  private 
         | 
| 19 | 
            -
             | 
| 20 | 
            -
                  def copy from, to 
         | 
| 21 | 
            -
                    create_path_if_not_exists to
         | 
| 22 | 
            -
                    `cp #{from} #{to}`
         | 
| 23 | 
            -
                  end
         | 
| 24 | 
            -
             | 
| 25 | 
            -
                  def create_path_if_not_exists path
         | 
| 26 | 
            -
                    `mkdir -p #{path}` unless File.directory?(path)
         | 
| 27 | 
            -
                  end
         | 
| 28 | 
            -
             | 
| 29 | 
            -
                  def replace_path_variables path
         | 
| 30 | 
            -
                    new_path = path.dup
         | 
| 31 | 
            -
                    new_path.gsub! /:file_name/, @name
         | 
| 32 | 
            -
                    new_path.gsub! /:sequence/, get_sequence_number_for(new_path)
         | 
| 33 | 
            -
                    new_path
         | 
| 34 | 
            -
                  end
         | 
| 35 | 
            -
             | 
| 36 | 
            -
                  def get_sequence_number_for path
         | 
| 37 | 
            -
                    new_path = path.split(":sequence")[0]
         | 
| 38 | 
            -
                    if File.directory?(new_path)
         | 
| 39 | 
            -
                      entries = Dir.entries(new_path).select { |i| i =~ /^\d+$/ } || []
         | 
| 40 | 
            -
                      last = entries.sort.last.to_i
         | 
| 41 | 
            -
                      last += 1
         | 
| 42 | 
            -
                      last.to_s
         | 
| 43 | 
            -
                    else
         | 
| 44 | 
            -
                      "1"
         | 
| 45 | 
            -
                    end
         | 
| 46 | 
            -
                  end
         | 
| 47 | 
            -
             | 
| 48 | 
            -
                  def extract_path_from_node node
         | 
| 49 | 
            -
                    if href_or_src = node.href.present? ? node.href : node.src
         | 
| 50 | 
            -
                      node.element.document.path.path_for(href_or_src)
         | 
| 51 | 
            -
                    else
         | 
| 52 | 
            -
                      throw Exception.new("no href no src")
         | 
| 53 | 
            -
                    end
         | 
| 54 | 
            -
                  end
         | 
| 55 | 
            -
                end
         | 
| 56 | 
            -
              end
         | 
| 57 | 
            -
            end
         | 
| @@ -1,87 +0,0 @@ | |
| 1 | 
            -
            describe Skyscraper::Node::Base do
         | 
| 2 | 
            -
              describe "when is initialized" do
         | 
| 3 | 
            -
                before(:each) do
         | 
| 4 | 
            -
                  @node = Skyscraper::fetch(path_to("skyscraper-node-base.html")).first("div.item")
         | 
| 5 | 
            -
                end
         | 
| 6 | 
            -
                
         | 
| 7 | 
            -
                it "should returns html code" do
         | 
| 8 | 
            -
                  @node.html.should include "<strong class=\"name\">Name value</strong>"
         | 
| 9 | 
            -
                end
         | 
| 10 | 
            -
             | 
| 11 | 
            -
                it "should returns class name" do
         | 
| 12 | 
            -
                  @node.class.should == "item"
         | 
| 13 | 
            -
                end
         | 
| 14 | 
            -
             | 
| 15 | 
            -
                it "should be auto converted to string with stripped tags" do
         | 
| 16 | 
            -
                  @node.text.should == "Name value"
         | 
| 17 | 
            -
                end
         | 
| 18 | 
            -
              end
         | 
| 19 | 
            -
             | 
| 20 | 
            -
              it "should follow links" do
         | 
| 21 | 
            -
                Skyscraper::fetch(path_to("skyscraper-node-base.html")).first("li a").follow.first("h1").text.should == "Hello from A"
         | 
| 22 | 
            -
              end
         | 
| 23 | 
            -
             | 
| 24 | 
            -
              it "should deep follow links" do
         | 
| 25 | 
            -
                Skyscraper::fetch(path_to("skyscraper-node-base.html")).first("li a").follow.first("a").follow.first("h1").text.should == "Hello from B"
         | 
| 26 | 
            -
              end
         | 
| 27 | 
            -
             | 
| 28 | 
            -
              it "should download page" do
         | 
| 29 | 
            -
                remove_test_directory
         | 
| 30 | 
            -
                Skyscraper.config.download_path = "/tmp/skyscraper_test/nodes/:file_name"
         | 
| 31 | 
            -
                file = Skyscraper::fetch(path_to("skyscraper-node-base.html")).first("li a").follow.first("a").download
         | 
| 32 | 
            -
                File.exists?(file).should == true
         | 
| 33 | 
            -
              end
         | 
| 34 | 
            -
              describe "traversing" do
         | 
| 35 | 
            -
                before(:each) do
         | 
| 36 | 
            -
                  @node = Skyscraper::fetch(path_to("skyscraper-node-base-traversing.html")).first(".menu")
         | 
| 37 | 
            -
                end
         | 
| 38 | 
            -
             | 
| 39 | 
            -
                it "should find descendands items" do
         | 
| 40 | 
            -
                  result = @node.find("li")
         | 
| 41 | 
            -
                  result.length.should == 5
         | 
| 42 | 
            -
                  result.map(&:text).should include "Item 4 1"
         | 
| 43 | 
            -
                end
         | 
| 44 | 
            -
             | 
| 45 | 
            -
                it "should returns children of element with selector" do
         | 
| 46 | 
            -
                  node = Skyscraper::fetch(path_to("skyscraper-node-base-traversing.html")).first("#parent-3")
         | 
| 47 | 
            -
                  node.children(".a").length.should == 4
         | 
| 48 | 
            -
                  node.children(".b").length.should == 2
         | 
| 49 | 
            -
                end
         | 
| 50 | 
            -
             | 
| 51 | 
            -
                it "should returns children of element without selector" do
         | 
| 52 | 
            -
                  result = @node.children
         | 
| 53 | 
            -
                  result.length.should == 4
         | 
| 54 | 
            -
                  result.map(&:to_s).should_not include "Item 4 1"
         | 
| 55 | 
            -
                end
         | 
| 56 | 
            -
             | 
| 57 | 
            -
                it "should returns first element" do
         | 
| 58 | 
            -
                  @node.first("li").class.should == "item-1"
         | 
| 59 | 
            -
                end
         | 
| 60 | 
            -
             | 
| 61 | 
            -
                it "should returns parent of item" do
         | 
| 62 | 
            -
                  @node.parent.class.should == "parent-2"
         | 
| 63 | 
            -
                end
         | 
| 64 | 
            -
             | 
| 65 | 
            -
                it "should tells if element have parent" do
         | 
| 66 | 
            -
                  @node.have_parent?.should == true
         | 
| 67 | 
            -
                  @node.parents("html").first.have_parent?.should == false
         | 
| 68 | 
            -
                end
         | 
| 69 | 
            -
             | 
| 70 | 
            -
                it "should returns parents of item" do
         | 
| 71 | 
            -
                  @node.parents.length.should == 4
         | 
| 72 | 
            -
                end
         | 
| 73 | 
            -
                
         | 
| 74 | 
            -
                it "should returns parents of item matched by selector" do
         | 
| 75 | 
            -
                  @node.parents("div").length.should == 2
         | 
| 76 | 
            -
                end
         | 
| 77 | 
            -
             | 
| 78 | 
            -
                it "should returns siblings of item" do
         | 
| 79 | 
            -
                  @node.first(".item-3").siblings.length.should == 3
         | 
| 80 | 
            -
                end
         | 
| 81 | 
            -
             | 
| 82 | 
            -
                it "should returns node tag" do
         | 
| 83 | 
            -
                  @node.tag.should == "ul"
         | 
| 84 | 
            -
                end
         | 
| 85 | 
            -
              end
         | 
| 86 | 
            -
            end
         | 
| 87 | 
            -
             |