yasf 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ require "spec_helper"
2
+
3
+ describe Yasf::Scraper do
4
+
5
+ describe ".result" do
6
+ it "raises without arguments" do
7
+ lambda {
8
+ Yasf::Scraper::result
9
+ }.should raise_error(ArgumentError, "one symbol to return the value of this accessor")
10
+ end
11
+
12
+ it "Return a proc with two or more symbols" do
13
+ Yasf::Scraper::result(:one, :tow, :three).should be_a(Proc)
14
+ end
15
+
16
+ end
17
+ end
18
+
@@ -0,0 +1,100 @@
1
+ require 'spec_helper'
2
+
3
+ describe Yasf do
4
+ it "should be created new scraper without block" do
5
+ scraper = Yasf::define
6
+ scraper.should_not be_nil
7
+ scraper.ancestors.should be_include(Yasf::Scraper)
8
+ end
9
+
10
+ it "should be raised error sintax with wrong selector" do
11
+ url = "http://www.fakeurl.com/basic_example"
12
+
13
+ scraper = Yasf::define do
14
+ scrape :title, "2h1.title", :title => :text
15
+ result :title
16
+ end
17
+
18
+ lambda {
19
+ title = scraper.extract_from(url)
20
+ }.should raise_error(Nokogiri::CSS::SyntaxError)
21
+
22
+ end
23
+
24
+ it "scrape basic example content" do
25
+ url = "http://www.fakeurl.com/basic_example"
26
+
27
+ scraper = Yasf::define do
28
+ scrape :title, "h1.title", :title => :text
29
+ result :title
30
+ end
31
+
32
+ title = scraper.extract_from(url)
33
+ title.should be_eql("Title 1")
34
+ end
35
+
36
+ it "scrape basic example content with not found selector should be return nil" do
37
+ url = "http://www.fakeurl.com/basic_example"
38
+
39
+ scraper = Yasf::define do
40
+ scrape :title, "h1.not_found_element", :title => :text
41
+ result :title
42
+ end
43
+
44
+ title = scraper.extract_from(url)
45
+ title.should be_eql(nil)
46
+ end
47
+
48
+ it "scrape medium example content and result should be stored in array" do
49
+ url = "http://www.fakeurl.com/medium_example"
50
+ scraper = Yasf::define do
51
+ scrape :title, "h1.title", :'titles[]' => :text
52
+ result :titles
53
+ end
54
+ titles = scraper.extract_from(url)
55
+ titles.should be_is_a(Array)
56
+ end
57
+
58
+ it "scrape advanced example content and should be to have more results" do
59
+ url = "http://www.fakeurl.com/advanced_example"
60
+
61
+ title = Yasf::define do
62
+ scrape :title, "h1.title_under_table", :title => :text
63
+ scrape :links, "a.title_under_table", :link_name => :text, :link_url => :href
64
+
65
+ result :title, :link_name, :link_url
66
+ end
67
+ scraper = Yasf::define do
68
+ scrape :titles, "table tr.tr_with_title", :'titles[]' => title
69
+
70
+ result :titles
71
+ end
72
+
73
+ titles = scraper.extract_from(url)
74
+ titles.should be_is_a(Array)
75
+ titles.size.should be_equal(5)
76
+ titles[3].title.should be_eql("Title 4")
77
+ end
78
+
79
+ it "scrape thepiratebay.se should be to have more results" do
80
+ url = "http://thepiratebay.se/browse/101"
81
+
82
+ album = Yasf::define do
83
+ scrape :name, "td div.detName a", :name => :text
84
+ scrape :desc, "td font.detDesc", :desc => :text
85
+
86
+ result :name, :desc
87
+ end
88
+
89
+ scraper = Yasf::define do
90
+ scrape :albums, "table#searchResult tbody tr", :'albums[]' => album
91
+
92
+ result :albums
93
+ end
94
+ albums = scraper.extract_from(url)
95
+ albums.should be_is_a(Array)
96
+ albums.size.should be_equal(31)
97
+ albums[29].name.should be_eql("Maurice Ravel Complete Piano Works 2CD")
98
+ end
99
+
100
+ end
@@ -0,0 +1,24 @@
1
+ require 'yasf'
2
+
3
+ def read_fixture(path)
4
+ File.read(File.expand_path(File.join(File.dirname(__FILE__), "fixtures", path)))
5
+ end
6
+
7
+ FAKE_URLS = {
8
+ "http://www.fakeurl.com/basic_example" => "basic_example_response",
9
+ "http://www.fakeurl.com/medium_example" => "medium_example_response",
10
+ "http://www.fakeurl.com/advanced_example" => "advanced_example_response",
11
+ "http://thepiratebay.se/browse/101" => "thepiratebay_response.html"
12
+ }
13
+
14
+ begin
15
+ require 'fakeweb'
16
+
17
+ FakeWeb.allow_net_connect = false
18
+ FAKE_URLS.each do |url, response|
19
+ FakeWeb.register_uri(:get, url, :body => read_fixture(response))
20
+ end
21
+ rescue LoadError
22
+ puts "Could not load FakeWeb. Please run 'bundle install'"
23
+ end
24
+
data/yasf.gemspec ADDED
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "yasf/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "yasf"
7
+ s.version = Yasf::VERSION
8
+ s.authors = ["Algonauti"]
9
+ s.email = ["devel@algonauti.com"]
10
+ s.homepage = "https://github.com/algonauti/yasf"
11
+ s.summary = %q{Uses DSL to write easy, maintainable HTML scraping rules.}
12
+ s.description = %q{HTML scraping to write maintainable rules to extract data from HTML content.}
13
+
14
+ s.files = `git ls-files`.split("\n")
15
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
+ s.require_paths = ["lib"]
18
+
19
+ s.add_dependency('nokogiri', '1.5.5')
20
+
21
+ s.add_development_dependency 'rake'
22
+ s.add_development_dependency 'rspec'
23
+ s.add_development_dependency 'fakeweb'
24
+
25
+ end
metadata ADDED
@@ -0,0 +1,118 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yasf
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Algonauti
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-28 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: &2151806380 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - =
20
+ - !ruby/object:Gem::Version
21
+ version: 1.5.5
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2151806380
25
+ - !ruby/object:Gem::Dependency
26
+ name: rake
27
+ requirement: &2151805860 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2151805860
36
+ - !ruby/object:Gem::Dependency
37
+ name: rspec
38
+ requirement: &2151805300 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *2151805300
47
+ - !ruby/object:Gem::Dependency
48
+ name: fakeweb
49
+ requirement: &2151804720 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2151804720
58
+ description: HTML scraping to write maintainable rules to extract data from HTML content.
59
+ email:
60
+ - devel@algonauti.com
61
+ executables: []
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - .gitignore
66
+ - .rvmrc
67
+ - Gemfile
68
+ - Gemfile.lock
69
+ - LICENSE.txt
70
+ - README.md
71
+ - Rakefile
72
+ - lib/yasf.rb
73
+ - lib/yasf/scraper.rb
74
+ - lib/yasf/version.rb
75
+ - spec/fixtures/.gitkeep
76
+ - spec/fixtures/advanced_example_response
77
+ - spec/fixtures/basic_example_response
78
+ - spec/fixtures/medium_example_response
79
+ - spec/fixtures/thepiratebay_response.html
80
+ - spec/lib/yasf/.gitkeep
81
+ - spec/lib/yasf/scraper_spec.rb
82
+ - spec/lib/yasf_spec.rb
83
+ - spec/spec_helper.rb
84
+ - yasf.gemspec
85
+ homepage: https://github.com/algonauti/yasf
86
+ licenses: []
87
+ post_install_message:
88
+ rdoc_options: []
89
+ require_paths:
90
+ - lib
91
+ required_ruby_version: !ruby/object:Gem::Requirement
92
+ none: false
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ! '>='
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ requirements: []
104
+ rubyforge_project:
105
+ rubygems_version: 1.8.15
106
+ signing_key:
107
+ specification_version: 3
108
+ summary: Uses DSL to write easy, maintainable HTML scraping rules.
109
+ test_files:
110
+ - spec/fixtures/.gitkeep
111
+ - spec/fixtures/advanced_example_response
112
+ - spec/fixtures/basic_example_response
113
+ - spec/fixtures/medium_example_response
114
+ - spec/fixtures/thepiratebay_response.html
115
+ - spec/lib/yasf/.gitkeep
116
+ - spec/lib/yasf/scraper_spec.rb
117
+ - spec/lib/yasf_spec.rb
118
+ - spec/spec_helper.rb