content_scrapper 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Gyorgy Frivolt
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,17 @@
1
+ = content_scrapper
2
+
3
+ Description goes here.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2010 Gyorgy Frivolt. See LICENSE for details.
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "content_scrapper"
8
+ gem.summary = "Gem for those who want to screen scrap only the content part of web pages, blogs or articles."
9
+ gem.description = "If you want to cut only the content of pages, without any other part (like the menu, header, footer, commercials, etc.), you might find this gem very handy. A DSL is also defined for nifty definitions for your screen scrapping and sanitization."
10
+ gem.email = "gyorgy.frivolt@gmail.com"
11
+ gem.homepage = "http://github.com/fifigyuri/content_scrapper"
12
+ gem.authors = ["Gyorgy Frivolt"]
13
+ gem.add_development_dependency 'thoughtbot-shoulda', '>=2.10.2'
14
+ gem.add_development_dependency 'mocha', '>=0.9.8'
15
+
16
+ gem.add_dependency 'sanitize', '>=1.2.0'
17
+ gem.add_dependency 'nokogiri', '>=1.4.1'
18
+ end
19
+ Jeweler::GemcutterTasks.new
20
+ rescue LoadError
21
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
22
+ end
23
+
24
+ require 'rake/testtask'
25
+ Rake::TestTask.new(:test) do |test|
26
+ test.libs << 'lib' << 'test'
27
+ test.pattern = 'test/**/test_*.rb'
28
+ test.verbose = true
29
+ end
30
+
31
+ begin
32
+ require 'rcov/rcovtask'
33
+ Rcov::RcovTask.new do |test|
34
+ test.libs << 'test'
35
+ test.pattern = 'test/**/test_*.rb'
36
+ test.verbose = true
37
+ end
38
+ rescue LoadError
39
+ task :rcov do
40
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
41
+ end
42
+ end
43
+
44
+ task :test => :check_dependencies
45
+
46
+ task :default => :test
47
+
48
+ require 'rake/rdoctask'
49
+ Rake::RDocTask.new do |rdoc|
50
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
51
+
52
+ rdoc.rdoc_dir = 'rdoc'
53
+ rdoc.title = "content_scrapper #{version}"
54
+ rdoc.rdoc_files.include('README*')
55
+ rdoc.rdoc_files.include('lib/**/*.rb')
56
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.3
@@ -0,0 +1,3 @@
1
+
2
+ sanitize_tags ({:elements => ['p','br', 'b', 'em', 'i', 'strong', 'u', 'a', 'h1', 'h2', 'h3', 'li', 'ol', 'ul'], \
3
+ :attributes => { 'a' => ['href'] }})
@@ -0,0 +1,72 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{content_scrapper}
8
+ s.version = "0.0.3"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Gyorgy Frivolt"]
12
+ s.date = %q{2010-02-13}
13
+ s.description = %q{If you want to cut only the content of pages, without any other part (like the menu, header, footer, commercials, etc.), you might find this gem very handy. A DSL is also defined for nifty definitions for your screen scrapping and sanitization.}
14
+ s.email = %q{gyorgy.frivolt@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "config/content_scrapper.rb",
27
+ "content_scrapper.gemspec",
28
+ "lib/content_scrapper.rb",
29
+ "lib/content_scrapper/content_mapping.rb",
30
+ "lib/content_scrapper/feedzirra.rb",
31
+ "rails/init.rb",
32
+ "test/helper.rb",
33
+ "test/test_content_mapping.rb",
34
+ "test/test_content_scrapper.rb",
35
+ "test/test_pages/pretty.html",
36
+ "test/test_pages/twocontent.html",
37
+ "test/test_pages/ugly.html"
38
+ ]
39
+ s.homepage = %q{http://github.com/fifigyuri/content_scrapper}
40
+ s.rdoc_options = ["--charset=UTF-8"]
41
+ s.require_paths = ["lib"]
42
+ s.rubygems_version = %q{1.3.5}
43
+ s.summary = %q{Gem for those who want to screen scrap only the content part of web pages, blogs or articles.}
44
+ s.test_files = [
45
+ "test/test_content_mapping.rb",
46
+ "test/test_content_scrapper.rb",
47
+ "test/helper.rb"
48
+ ]
49
+
50
+ if s.respond_to? :specification_version then
51
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
52
+ s.specification_version = 3
53
+
54
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
55
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 2.10.2"])
56
+ s.add_development_dependency(%q<mocha>, [">= 0.9.8"])
57
+ s.add_runtime_dependency(%q<sanitize>, [">= 1.2.0"])
58
+ s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
59
+ else
60
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 2.10.2"])
61
+ s.add_dependency(%q<mocha>, [">= 0.9.8"])
62
+ s.add_dependency(%q<sanitize>, [">= 1.2.0"])
63
+ s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
64
+ end
65
+ else
66
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 2.10.2"])
67
+ s.add_dependency(%q<mocha>, [">= 0.9.8"])
68
+ s.add_dependency(%q<sanitize>, [">= 1.2.0"])
69
+ s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
70
+ end
71
+ end
72
+
@@ -0,0 +1,62 @@
1
+
2
+ require 'open-uri'
3
+ require 'nokogiri'
4
+ require 'sanitize'
5
+
6
+ require 'content_scrapper/content_mapping'
7
+
8
+ class ContentScrapper
9
+
10
+ class << self
11
+ attr_accessor :default_config_file, :default
12
+ default_config_file = "#{File.dirname(__FILE__)}/../config/content_scrapper.rb"
13
+
14
+ def create_new_default(*args)
15
+ self.default = self.new(*args)
16
+ end
17
+ end
18
+
19
+ def set_as_default
20
+ ContentScrapper.default = self
21
+ end
22
+
23
+ attr_accessor :content_mappings, :sanitize_settings
24
+
25
+ def initialize(scrapper_config_file = nil)
26
+ @content_mappings = []
27
+ config_file = ContentScrapper.default_config_file
28
+ self.instance_eval(File.read(config_file), config_file) unless config_file.nil?
29
+ end
30
+
31
+ def content_mapping(&block)
32
+ new_mapping = ContentMapping.new
33
+ new_mapping.instance_eval(&block)
34
+ @content_mappings << new_mapping
35
+ end
36
+
37
+ def sanitize_tags(settings)
38
+ @sanitize_settings = settings
39
+ end
40
+
41
+ def scrap_content(url)
42
+ content_mappings.each do | content_mapping |
43
+ if content_mapping.matches_url?(url) and !content_mapping.content_xpaths_list.empty?
44
+ begin
45
+ doc = Nokogiri::HTML(Kernel.open(url))
46
+ content = content_mapping.scrap_content(doc)
47
+ if content.nil?
48
+ return nil
49
+ else
50
+ return Sanitize.clean(content, sanitize_settings)
51
+ end
52
+ rescue Exception
53
+ scrap_content_exception($!)
54
+ end
55
+ end
56
+ end
57
+ nil
58
+ end
59
+
60
+ def scrap_content_exception(exception)
61
+ end
62
+ end
@@ -0,0 +1,29 @@
1
+
2
+ class ContentMapping
3
+
4
+ attr_reader :content_xpaths_list, :url_pattern_regexp
5
+
6
+ def initialize
7
+ @content_xpaths_list = []
8
+ end
9
+
10
+ def url_pattern(pattern)
11
+ @url_pattern_regexp = pattern
12
+ end
13
+
14
+ def content_at(content_xpath)
15
+ @content_xpaths_list << content_xpath
16
+ end
17
+
18
+ def matches_url?(url)
19
+ url =~ @url_pattern_regexp
20
+ end
21
+
22
+ def scrap_content(doc)
23
+ @content_xpaths_list.each do |content_xpath|
24
+ content_section = doc.xpath(content_xpath)
25
+ return content_section.to_a.join("\n") if content_section.count > 0
26
+ end
27
+ nil
28
+ end
29
+ end
@@ -0,0 +1,17 @@
1
+ # feedzirra entries are extended by methods for scrapping content
2
+ require 'feedzirra/feed_entry_utilities'
3
+
4
+ module Feedzirra
5
+ module FeedEntryUtilities
6
+
7
+ # Scrap the content based on the URL and the existing content and return it
8
+ def scrap_content(scrapper = ContentScrapper.default)
9
+ scrapper.scrap_content(self.url) || self.content.to_s
10
+ end
11
+
12
+ # Scrap the content or use the existing one and change the feed entry
13
+ def scrap_content!(scrapper = ContentScrapper.default)
14
+ content = scrap_content(scrapper)
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,3 @@
1
+ require 'content_scrapper'
2
+
3
+ ContentScrapper.default_config_file = "#{RAILS_ROOT}/config/content_scrapper.yml"
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'content_scrapper'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,43 @@
1
+
2
+ require 'helper'
3
+ require 'mocha'
4
+
5
+ class TestContentMapping < Test::Unit::TestCase
6
+
7
+ context "on empty content mapping creation" do
8
+ setup do
9
+ @mapping = ContentMapping.new
10
+ @mapping.instance_eval do
11
+ url_pattern /^http:\/\/www\.matchme\.com\//
12
+ content_at '//div[@id="failing_content"]'
13
+ content_at '//div[@id="itext_content"]'
14
+ content_at '//div[@id="itext_second_content"]'
15
+ end
16
+ end
17
+ should "match the right urls" do
18
+ assert @mapping.matches_url?('http://www.matchme.com/')
19
+ end
20
+ should "not match the wrong urls" do
21
+ assert !@mapping.matches_url?('https://www.somethingelse.org/hfas')
22
+ end
23
+ context "scrapping content for a specific site" do
24
+ setup do
25
+ pretty_content = File.open("#{File.dirname(__FILE__)}/test_pages/pretty.html").read
26
+ @document = Nokogiri::HTML(pretty_content)
27
+ end
28
+ should "extract the content" do
29
+ assert_match(%r{<p><strong>This is a strong text</strong></p>}, @mapping.scrap_content(@document))
30
+ end
31
+ end
32
+ context "on document with two content parts" do
33
+ setup do
34
+ two_content = File.open("#{File.dirname(__FILE__)}/test_pages/twocontent.html").read
35
+ @document = Nokogiri::HTML(two_content)
36
+ end
37
+ should "evaluate the contents in the order as they were added" do
38
+ assert_match(%r{The first one is matched}, @mapping.scrap_content(@document))
39
+ end
40
+ end
41
+ end
42
+
43
+ end
@@ -0,0 +1,123 @@
1
+ require 'helper'
2
+ require 'mocha'
3
+
4
+ class TestContentScrapper < Test::Unit::TestCase
5
+
6
+ ContentScrapper.default_config_file = nil
7
+
8
+ context "on common setting" do
9
+ setup do
10
+ @scrapper = ContentScrapper.new
11
+ @scrapper.instance_eval do
12
+ content_mapping do
13
+ url_pattern /^http:\/\/www\.pretty\.url/
14
+ content_at '//div[@id="failing_content"]'
15
+ content_at '//div[@id="itext_content"]'
16
+ end
17
+
18
+ content_mapping do
19
+ url_pattern /^http:\/\/www\.twopatterns\.url/
20
+ content_at '//div[@id="failing_content"]'
21
+ content_at '//div[@id="itext_content"]'
22
+ end
23
+
24
+ content_mapping do
25
+ url_pattern /^http:\/\/www\.twopatterns\.url/
26
+ content_at '//div[@id="itext_second_content"]'
27
+ end
28
+
29
+ sanitize_tags ({:elements => ['p','br', 'b', 'em', 'i', 'strong', 'u', 'a', 'h1', 'h2', 'h3', 'li', 'ol', 'ul'], \
30
+ :attributes => { 'a' => ['href'] }})
31
+ end
32
+ end
33
+
34
+ context "for known sources with expected content scrapping" do
35
+ setup do
36
+ pretty_content = File.open("#{File.dirname(__FILE__)}/test_pages/pretty.html").read
37
+ stringio = StringIO.new(pretty_content)
38
+ Kernel.expects(:open).returns(stringio)
39
+ @entry_content = @scrapper.scrap_content('http://www.pretty.url/fsdsd')
40
+ end
41
+ should("identify the content") do
42
+ assert_match(%r{<p><strong>This is a strong text</strong></p>}, @entry_content)
43
+ end
44
+ end
45
+
46
+ context "for known pages with unexpected content scrapping" do
47
+ setup do
48
+ ugly_content = File.open("#{File.dirname(__FILE__)}/test_pages/ugly.html").read
49
+ stringio = StringIO.new(ugly_content)
50
+ Kernel.expects(:open).returns(stringio)
51
+ @entry_content = @scrapper.scrap_content('http://www.pretty.url/hsdae')
52
+ end
53
+ should("return nil") { assert_nil @entry_content }
54
+ end
55
+
56
+ context "for unknown pages" do
57
+ setup { @entry_content = @scrapper.scrap_content('http://www.unknown.url/hsdae') }
58
+ should("return nil") { assert_nil @entry_content }
59
+ end
60
+
61
+ context "multiple matching url patterns" do
62
+ setup do
63
+ twocontent = File.open("#{File.dirname(__FILE__)}/test_pages/twocontent.html").read
64
+ stringio = StringIO.new(twocontent)
65
+ Kernel.expects(:open).with('http://www.twopatterns.url').returns(stringio)
66
+ @entry_content = @scrapper.scrap_content('http://www.twopatterns.url')
67
+ end
68
+ should "match the first content" do
69
+ assert_equal 'The first one is matched', @entry_content
70
+ end
71
+ end
72
+
73
+ context "on scrapping with feedzirra" do
74
+ setup do
75
+ require 'content_scrapper/feedzirra'
76
+ require 'sax-machine'
77
+ require 'feedzirra/parser/rss_entry'
78
+ require 'feedzirra/parser/atom_entry'
79
+ end
80
+
81
+ context "feed entry with not parsable remote content, but with feed content set" do
82
+ setup do
83
+ @feed_entries = [ Feedzirra::Parser::RSSEntry.new, Feedzirra::Parser::AtomEntry.new ]
84
+ @feed_entries.each do |feed_entry|
85
+ feed_entry.url = 'http://www.unknown.url/wedhsf'
86
+ feed_entry.content = 'Pretty well written content is this.'
87
+ end
88
+ Kernel.expects(:open).with('http://www.unknown.url/wedhsf').never
89
+ end
90
+ should("return the original feed content") do
91
+ @feed_entries.each do |feed_entry|
92
+ assert_equal 'Pretty well written content is this.', feed_entry.scrap_content(@scrapper)
93
+ feed_entry.scrap_content!(@scrapper)
94
+ assert_equal 'Pretty well written content is this.', feed_entry.content
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ context "on setting default content scrapper" do
102
+ setup { @scrapper = ContentScrapper.create_new_default }
103
+ should "set the default to the recently created" do
104
+ assert_equal @scrapper, ContentScrapper.default
105
+ end
106
+ context "when changing default content scrapper" do
107
+ setup { @new_scrapper = ContentScrapper.new.set_as_default }
108
+ should "change the default to the new content scrapper" do
109
+ assert_equal @new_scrapper, ContentScrapper.default
110
+ end
111
+ end
112
+ context "for feed entry" do
113
+ setup do
114
+ @feed_entry = Feedzirra::Parser::RSSEntry.new
115
+ @feed_entry.url = 'http://www.unknown.url/gerhe'
116
+ @feed_entry.content = 'We should get this.'
117
+ end
118
+ should("scrap content by the default scrapper") do
119
+ assert_equal 'We should get this.', @feed_entry.scrap_content
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,17 @@
1
+ <html xmlns="http://www.w3.org/1999/xhtml">
2
+ <head>
3
+ <title>Pretty title</title>
4
+ </head>
5
+ <body>
6
+ <div class="articlec col">
7
+ <div id="itext_content">
8
+ <p><strong>This is a strong text</strong></p><br/>
9
+ SOMEWHERE. Things happened...
10
+ <p>paragraph1</p>
11
+ <p>paragraph2</p>
12
+ <h3>Section</h3>
13
+ <p>paragraph3</p>
14
+ <p>paragraph4</p>
15
+ <p>paragraph5</p>
16
+ </body>
17
+ </html>
@@ -0,0 +1,11 @@
1
+ <html xmlns="http://www.w3.org/1999/xhtml">
2
+ <head>
3
+ <title>Pretty title</title>
4
+ </head>
5
+ <body>
6
+ <div class="articlec col">
7
+ <div id="itext_second_content">The second pattern should not be matched</div>
8
+ <div id="itext_content">The first one is matched</div>
9
+ </div>
10
+ </body>
11
+ </html>
@@ -0,0 +1,399 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml">
3
+ <head>
4
+ <link rel="canonical" href="http://veda.sme.sk/c/5030948/zijeme-vo-veku-hlupakov.html" />
5
+ <meta http-equiv="Content-Type" content="text/html; charset=windows-1250" />
6
+ <meta name="description" content="Zd� sa, �e v prebiehaj�cej diskusii o kl�me chc� v�etci svetu iba dobre. Pre�o potom pad� to�ko ostr�ch slov?" />
7
+ <meta name="keywords" content="veda, technika, botanika, zool�gia, medic�na, psychol�gia, sociol�gia, dinosaury" />
8
+ <meta name="copyright" content="Petit Press, a.s." />
9
+ <meta name="author" content="SME - Petit Press, a.s." />
10
+ <meta name="revisit-after" content="1 days" />
11
+ <meta name="classification" content="Media" />
12
+ <meta name="distribution" content="Global" />
13
+ <meta name="rating" content="General" />
14
+ <meta name="doc-type" content="Web Page" />
15
+ <meta name="doc-class" content="Published" />
16
+ <meta name="doc-rights" content="Copywritten Work" />
17
+ <meta name="doc-publisher" content="Petit Press, a.s." />
18
+ <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon" />
19
+ <link rel="alternate" type="application/rss+xml" title="RSS" href="http://rss.sme.sk/rss/rss.asp?sek=veda" />
20
+ <title>�ijeme vo veku hlup�kov? | Ekol�gia | veda.sme.sk</title>
21
+ <link rel="stylesheet" type="text/css" href="http://www.sme.sk/css/sme_layout.css?rev=19" />
22
+ <link rel="stylesheet" type="text/css" href="http://veda.sme.sk/css/sme_sub_layout.css?rev=24" />
23
+ <link rel="stylesheet" type="text/css" href="http://www.sme.sk/css/sme_header.css" />
24
+ <link rel="stylesheet" type="text/css" href="http://veda.sme.sk/css/sme_sub_header.css" />
25
+ <link rel="stylesheet" type="text/css" href="http://www.sme.sk/css/sme_article.css?rev=8" />
26
+ <link rel="stylesheet" href="http://www.sme.sk/css/sme_article2.css?rev=4" type="text/css" />
27
+ <link rel="stylesheet" href="http://www.sme.sk/css/sme_article_hist.css" type="text/css" />
28
+ <script type="text/javascript" src="http://veda.sme.sk/jscript/main.js?rev=19"></script>
29
+ <script type="text/javascript" src="http://veda.sme.sk/storm_real/jscript/anketa.js?rev=18"></script>
30
+ <script type="text/javascript" src="http://veda.sme.sk/jscript/x.js"></script>
31
+ <script type="text/javascript" src="http://veda.sme.sk/jscript/my_x.js"></script>
32
+ <link rel="stylesheet" type="text/css" href="http://www.sme.sk/storm_real/pager/pager.css" />
33
+ <link rel="stylesheet" type="text/css" href="http://www.sme.sk/css/sme_pager.css" />
34
+ <script type="text/javascript" src="http://www.sme.sk/storm_real/pager/pager_common.js"></script>
35
+ <script type="text/javascript" src="http://www.sme.sk/storm_real/pager/pager.js"></script>
36
+ </head>
37
+ <body onload="resizeLogo(); ">
38
+ <div class="top-ban"><div id='bmone2n-2337.47.2.64'>
39
+ <noscript><div style='display:inline'><a href='http://ad2.bbmedia.cz/please/redirect/2337/47/2/64/'><img src='http://ad2.bbmedia.cz/please/showit/2337/47/2/64/?typkodu=img' width='300' height='300' style='border-width:0' alt='' /></a></div></noscript>
40
+ </div>
41
+ </div>
42
+ <div id="logo">
43
+ <a href="http://www.sme.sk/"><img src="http://www.sme.sk/imgs/logo4.gif" width="139" height="39" alt="SME logo" /></a>
44
+ <!-- header -->
45
+ <div id="pager">
46
+ <div id="cita">��ta 23�684 �ud�</div>
47
+ <script type="text/javascript">
48
+ pager_generate(pager_commonPager, 0);
49
+ </script>
50
+ </div>
51
+ <!-- END header -->
52
+ </div>
53
+ <!-- header HS/PS -->
54
+ <div id="headerw">
55
+ <a href="http://veda.sme.sk"><img id="logoImg" src="http://veda.sme.sk/imgs/logo2/veda.gif" alt="logo" /></a>
56
+ </div>
57
+ <!-- END header -->
58
+ <div id="pagew">
59
+ <!-- navigacia & search -->
60
+ <div id="sroww">
61
+ <div id="sr_left"><a href="http://www.sme.sk">Sme.sk</a> | <a href="/">Veda</a> | <a href="/r/veda_eko/ekologia.html">Ekol�gia</a> | �ijeme vo veku hlup�kov?</div>
62
+ <div id="sr_searchw">
63
+ <div id="search">
64
+ <!-- SiteSearch Google -->
65
+ <form method="get" action="/gsearch/" target="_top">
66
+ <input type="hidden" name="domains" value="veda.sme.sk"></input>
67
+ <input id="sbi" name="q" type="text" value=" vyh�ad�vanie" onFocus="if(this.value==' vyh�ad�vanie') this.value='';" class="box" />
68
+ <label><input name="sitesearch" type="radio" value="veda.sme.sk" style="font-size: 5px" checked />v tejto sekcii</label>
69
+ <label><input name="sitesearch" type="radio" value="sme.sk" style="font-size: 5px" />v celom SME</label>
70
+ <input id="sbb" type="image" src="http://www.sme.sk/imgs/find_go.gif" width="20" height="17" align="absmiddle" />
71
+ <input type="hidden" name="client" value="pub-6456364300478111"></input>
72
+ <input type="hidden" name="forid" value="1"></input>
73
+ <input type="hidden" name="channel" value="5047212324"></input>
74
+ <input type="hidden" name="ie" value="windows-1250"></input>
75
+ <input type="hidden" name="oe" value="windows-1250"></input>
76
+ <input type="hidden" name="cof" value="GALT:#008000;GL:1;DIV:#D00000;VLC:663399;AH:center;BGC:FFFFFF;LBGC:D00000;ALC:D00000;LC:D00000;T:000000;GFNT:D00000;GIMP:D00000;FORID:11"></input>
77
+ <input type="hidden" name="hl" value="sk"></input>
78
+ </form>
79
+ <!-- SiteSearch Google -->
80
+ </div>
81
+ </div>
82
+ </div>
83
+ <!-- END navigacia -->
84
+ <!-- hlavny kontent -->
85
+ <div id="mainw" class="col2">
86
+ <!-- stredny + pravy stlpec -->
87
+ <div id="col2w" class="col2">
88
+ <!-- hlavny obsah stedneho stlpca -->
89
+ <div id="contentw">
90
+ <!-- storm-c-box -->
91
+ <script type="text/javascript" src="http://www.sme.sk/storm/jscript/euroConvertor_data.js?rev=6-25"></script>
92
+ <script type="text/javascript" src="http://www.sme.sk/storm/jscript/euroConvertor2.js?rev=10"></script>
93
+ <link rel="stylesheet" href="http://www.sme.sk/storm/css/euroConvertor2.css?rev=2" type="text/css" />
94
+
95
+ <!-- eTarget ContextAd Start -->
96
+ <div id="contenth" class="art">
97
+ <div class="options"><a href="/clanok_tlac.asp?cl=5030948"><img src="/storm/imgs/toolbar/tlac.gif" title="vytla�i�" border="0" /><span>VYTLA�I�</span></a> <a href="/diskusie/reaction_show.php?id_extern_theme=5030948&extern_type=sme-clanok"><img src="/storm/imgs/toolbar/koment.gif" title="diskutujte" border="0" /><span>diskutujte</span></a> <a href="#" onclick="send_err_info('art', 5030948);"><img src="/storm/imgs/toolbar/chyba.gif" title="Upozornite na chybu" border="0" /><span>UPOZORNITE NA CHYBU</span></a> <span class="sharespan">Po�lite:</span>
98
+ <a href="/clanok_tool.asp?t=odp&cl=5030948" class="sharelink"><img src="/storm/imgs/toolbar/email.gif" title="po�lite e-mailom" border="0" /><span>E-MAILOM</span></a> <a href="javascript:location.href='http://www.facebook.com/share.php?u='+encodeURIComponent(location.href);" onclick="return art_poslite_click('fb')" class="sharelink"><img src="/storm/imgs/toolbar/fb.gif" border="0" title="prida� na facebook" ><span>na facebook</span></a>
99
+ <a href="javascript:location.href='http://vybrali.sme.sk/sub.php?url='+encodeURIComponent(location.href);" class="sharelink" onclick="return art_poslite_click('vbsme');"><img src="/storm/imgs/toolbar/doasdf_c.gif" title="prida� na vybrali.sme.sk" border="0" /><span>VYBRALI.SME</span></a> | <a href="#" class="sharelink" onmouseover="sharelinkOver()" onmouseout="sharelinkOut()"><span>�al�ie</span><img src="/storm/imgs/toolbar/share_arrow.gif" alt="+" border="0"></a>
100
+ <div class="share_more" id="share_more" onmouseover="sharemoreOver();" onmouseout="sharemoreOut();">
101
+ <ul>
102
+ <li><a href="http://delicious.com/save" onclick="return art_poslite_click('del')" class="sharelink"><img src="/storm/imgs/toolbar/delicious.gif" title="prida� na delicious" border="0"><span>delicious.com</span></a></li>
103
+ <li><a href="http://www.google.com/bookmarks" onclick="return art_poslite_click('g')" class="sharelink"><img src="/storm/imgs/toolbar/google.gif" title="prida� na google" border="0"><span>Google Bookmarks</span></a></li>
104
+ <li><a href="http://www.myspace.com/Modules/PostTo/Pages/" onclick="return art_poslite_click('myspc')" class="sharelink"><img src="/storm/imgs/toolbar/myspc.gif" title="prida� na myspace" border="0"><span>MySpace</span></a></li>
105
+ <li><a href="http://www.twitter.com/home?status=" onclick="return art_poslite_click('twit')" class="sharelink"><img src="/storm/imgs/toolbar/twit.gif" title="prida� na twitter" border="0"><span>Twitter</span></a></li>
106
+ </ul>
107
+ </div>
108
+ </div>
109
+ <h1>�ijeme vo veku hlup�kov?</h1>
110
+ </div>
111
+ <div class="articlec col">
112
+ <div id="CORRUPTED_HERE_itext_content">
113
+ <p><strong>Zd� sa, �e v prebiehaj�cej diskusii o kl�me chc� v�etci svetu iba dobre. Pre�o potom pad� to�ko ostr�ch slov?</strong><br /><br />BRATISLAVA. V pondelok mal v bratislavskom A4 - Nultom priestore premi�ru film Franny Armstrongovej The Age of Stupid (Vek hlup�kov), odohr�vaj�ci sa v roku 2055 po glob�lnom jadrovom konflikte, vyvolanom zmenou kl�my.</p>
114
+ <p>Star� mu�, ktor� �ije v zni�enom svete, si pozer� z�bery z filmov�ho arch�vu z na�ich �ias a p�ta sa: Pre�o sme nezastavili klimatick� zmenu, ke� bol e�te �as?</p>
115
+ <p>Odpove� znie, �e �udstvo sa odjak�iva spr�valo hl�po, d�kazom �oho s� pretrv�vaj�ce vojny o majetky a neschopnos� pozrie� sa dopredu. Ani na za�iatku 21. storo�ia zo svojich zvyklost� nevybo�ilo.<br />�o in� teda mohli hlup�ci �aka�, ke� si tak systematicky p�lili pod sebou kon�r?</p>
116
+ <h3>Dva scen�re</h3>
117
+ <p>V�etko sa zdalo tak� jednoduch�. Ak je pravda, �e kl�mu men�� �lovek, sta�ilo obmedzi� spa�ovanie uhlia a ropy, pr�padne sa nau�i� odb�rava� oxid uhli�it� � a <a href="http://www.sme.sk/storm/itextg.asp?idh=4791644&ids=6" class="itext_link">glob�lne otep�ovanie</a> by sa (�asom) zastavilo. Hlup�ci to v�ak neboli schopn� urobi�.</p>
118
+ <p style="text-align: center;"><img src="http://i.sme.sk/vydania/20090924/photo2/sm-0924-026c-kniha.rw.jpg" /></p>
119
+ <p style="text-align: center;"><span style="font-size: x-small;">Lord Lawson nap�sal knihu o glob�lnom otep�ovan� �triezvo a bez em�ci�, hoci s�m sa em�ci�m v�dy nevyhol.<br />P�e, �e veda o kl�me je iba jedn�m aspektom glob�lneho otep�ovania. D�le�it� je rozozna�, ak� opatrenia<br />s� politicky re�lne. Urobia� politici spr�vne rozhodnutie? </span></p>
120
+ <p>Teraz predlo�me in� scen�r, v ktorom tie� hraj� hlavn� �lohu nerozumn� �udia, tentoraz z knihy britsk�ho ekon�ma Nigela Lawsona Vr�me sa k rozumu (v �eskom preklade Petra Hol��ka, Doko��n, <a href="http://www.sme.sk/storm/itextg.asp?idh=4791640&ids=7" class="itext_link">Praha</a> 2009).<br />Lord Lawson of Blaby, minister vo vl�de Margaret Thatcherovej kon�tatuje, �e svet netreba zachra�ova� pred glob�lnym otep�ovan�m, ale pred t�mi, ktor� preh��aj� jeho nebezpe�enstvo.</p>
121
+ <p>�Nov� n�bo�enstvo glob�lneho otep�ovania... obsahuje zrnko pravdy a obrovsk� kopu nezmyslov... Zd� sa, �e sme vst�pili do nov�ho veku nerozumu, �o je hlboko znepokojuj�ce.�</p>
122
+ <p>Lawsonova poz�cia je zalo�en� na ve�mi optimistickom presved�en�, �e s��asn� klimatick� po��ta�ov� modely nemo�no prece�ova�, lebo pracuj� s pr�li� zlo�it�m a� chaotick�m syst�mom.</p>
123
+ <p>Nem�me teda istotu, �e glob�lne teploty sa bud� �alej zvy�ova�.</p>
124
+ <h3>Na�o pom�ha� bohat�m?</h3>
125
+ <p>Lawson v�ak prip���a, �e otep�ovanie m��e pokra�ova�. V tom pr�pade treba zv�i� n�klady, ktor� prinesie zni�ovanie emisi�, a pr�nosy, ktor� z toho pre na�ich potomkov vyplyn�.</p>
126
+ <p>Ke�e Lawsonovi je jasn�, �e v glob�le svet speje k v��iemu bohatstvu, je nerozumn� zajtra vynalo�i� obrovsk� �iastky na nov� technol�gie iba preto, aby bohat� �udia koncom tohto storo�ia boli iba o trochu chudobnej�� (aj tak im hroz� nieko�kon�sobne v��ie bohatstvo ako m�me k dispoz�cii my dnes).</p>
127
+ <p>�al��m argumentom je postoj� ��ny a Indie. Obe krajiny maj� obrovsk� z�soby uhlia a obe chc� pre svojich �ud� vy��iu �ivotn� �rove�, ak� si po�as priemyselnej revol�cie vybudovali s vyu�it�m fos�lnych pal�v Eur�pa a Severn� Amerika.</p>
128
+ <p>Taktie� nie je jednoduch� rozhodn��, ktor� z mo�n�ch glob�lnych hrozieb (terorizmus,� celosvetov� pand�mia, dopad asteroidu) je v porovnan� s glob�lnym otep�ovan�m naliehavej�ia; spom�na knihu s��asn�ho predsedu britskej Kr�ovskej spolo�nosti Martina Reesa Na�a posledn� hodina, z ktorej rozhodne nevypl�va, �e by svet mal prednostne rie�i� pr�ve glob�lne otep�ovanie.</p>
129
+ <p>V re�lnom svete m�me kone�n� zdroje a mus�me ur�ova� priority, tomuto sa unikn�� ned�, p�e Lawson.<br />�V�znam by mohla ma� iba z�sadn� zmena n�ho sp�sobu �ivota, ak� by priniesol podstatn� n�rast cien energie; ten je nevyhnutn� nielen preto, aby sme jej spotreb�vali menej, ale aj preto, aby sa bezuhl�kov� energia stala konkurencieschopnou.�</p>
130
+ <p>�</p>
131
+ <div class="clanok-plus">
132
+ <div class="clanok-plus-nadpis">�erven� �iara nad kl�mou?</div>
133
+ <p><strong>Film Vek hlup�kov stoj� za to, aby sme si ho pozreli. Za pre��tanie stoj� aj kniha lorda Lawsona.</strong><br /><br />Jeden z div�kov k filmu The Age of Stupid povedal: �Vedci, filmov� producenti a nakoniec aj �ir�ia verejnos� si za��naj� pomaly uvedomova� osudov� prepojenos� a zlo�itos� s��asn�ho sveta. Film ukazuje, �e ka�dodenn� �ivot �ud� m��e ma� ve�mi v�znamn� dopady na �ivoty �ud� na opa�nej strane sveta. Hoci sa zameriava najm� na kl�mu, jeho posolstvo je hlb�ie; sna�� sa nazna�i�, �e s na��m s��asn�m poh�adom na pr�rodu a svet nie je v�etko v poriadku.�</p>
134
+ <p>Kniha lorda Lawsona Vr�me sa k rozumu je zas pozoruhodn� najm� t�m, �e analyzuje glob�lne otep�ovanie z praktickej str�nky. Zrejme si naozaj m�lokto uvedomuje, �o v�etko bude treba obetova�, aby sme dok�zali obmedzi� emisie z ropy a uhlia nato�ko, aby sa prestal zvy�ova� obsah oxidu uhli�it�ho v ovzdu��.</p>
135
+ <p>Autor s�ubuje poh�ad bez em�ci�, preto je �koda, �e na z�ver stoto��uje bojovn�kov proti oxidu uhli�it�mu s marxistami, ktor� ke� zistili, �e sa komunizmus skon�il, sa r�chlo prefarbili na zeleno.</p>
136
+ <p>Je to nef�r �vaha, najm� ke� oxidom uhli�it�m kedysi argumentovala aj Lawsonova ��fka, ��elezn� lady� Margaret Thatcherov� vo svojom spravodlivom boji proti rebeluj�cim ban�kom.</p>
137
+ <p>�Alarmisti� v�ak m��u kontrova�: ak rozv�ny Lawson navrhuje v�bec nejak� opatrenia, potom najm� tak�, ak�ch bol schopn� u� nevzdelan� rusk� mu�ik v �asoch Petra I. (hovor� sa im adapt�cia): ke� bude zima, d�m si ko�uch. Ke� bude teplo, tak ho vyzle�iem. Nu� a ak bude ve�a vody, navle�iem si vy��ie gum�ky.</p>
138
+ <div class="clanok-plus-autor">(a�)</div>
139
+ </div>
140
+ <div class="clanok-plus">
141
+ <div class="clanok-plus-nadpis">Energia a Parkinsonove z�kony</div>
142
+ <p><strong>N�jdu sa spolo�n� stanovisk� pri spolo�nej snahe uplat�ova� n�zkouhl�kov� energetick� zdroje?</strong><br /><br />BRATISLAVA. Parkinsonov z�kon rovnosti expertov hovor�, �e na ka�d�ho experta pripad� in� expert s opa�n�m n�zorom.</p>
143
+ <p>Uk�zalo sa to aj v diskusii po filme The Age of Stupid v bratislavskom V-klube, ktor� vtipne moderoval Juraj Rizman zo slovenskej pobo�ky Greenpeace.</p>
144
+ <p style="text-align: center;"><img src="http://i.sme.sk/vydania/20090924/photo2/sm-0924-026c-ropnaveza.rw.jpg" /></p>
145
+ <p style="text-align: center;"><span style="font-size: x-small;">Symbol glob�lnej civiliz�cie � ropn� vrtn� ve�a. ILUSTRA�N� FOTO � TASR/AP</span></p>
146
+ <p>Traja z�stancovia nevyhnutnosti obmedzi� obsah uhl�ka vo vzduchu sa v nej nemohli zhodn��, ako to re�lne dosiahnu�: vyu��va� viac vetern� a slne�n� energiu a energiu biomasy alebo aj nepopul�rnu jadrov�?<br />Je jasn�, �e ve�mi v�tan�mi zdrojmi s� aj vietor (pozrite sa do Rak�ska, do Ma�arska �i do Ve�kej Brit�nie), aj Slnko (pozrite sa do Nemecka). Zatia� v�ak vo svete aj u n�s tvoria iba okrajov� doplnok energie, z�skavanej z klasick�ch zdrojov.</p>
147
+ <p>No a ak sa pozriete treb�rs do Franc�zska, zist�te, �e zabezpe�uje v��inu elektrickej energie z jadrov�ch reaktorov.</p>
148
+ <p>Zrejme v�aka v�asn�mu n�stupu hne� po druhej svetovej vojne, ke� e�te jadrov� reaktory bu� neboli tak� drah�, alebo tak� hroziv�.</p>
149
+ <p>�o teda potrebujeme, ak berieme klimatick� zmenu v�ne? Zrejme v�etky tieto zdroje. Ide�lne by bolo, keby raz nastal s�hlas expertov aspo� v tomto.</p>
150
+ <p>Preto�e ak sa nevedia dohodn�� traja �udia z jednej krajiny, ktor�m ide o spolo�n� vec, ako sa maj� dohodn�� tis�cky expertov zo stoviek kraj�n, z�ujmy ktor�ch s� ve�mi r�znorod�?</p>
151
+ <p>Ke�e z�stupcovia vl�dnych in�tit�ci� pozvanie na diskusiu vo �v��ku� ignorovali, vraj ako v�dy, nedozvedeli sme sa, �o maj� v skuto�nosti v pl�ne oni, ak v�bec nie�o.</p>
152
+ <div class="clanok-plus-autor">(a�)</div>
153
+ </div>
154
+ </div>
155
+ <!-- eTarget ContextAd End -->
156
+
157
+ <p class="autor_line"><b>�tvrtok 24. 9. 2009</b> | <a href="http://www.sme.sk/autor_info.asp?id=58">Michal A�</a><br /><span class="copyr_sme">�l�nok bol uverejnen� v tla�enom vydan� SME. (<a href="http://predplatne.sme.sk">Predpla�te si SME cez internet.</a>)</span><br /><span class="copyr"><a href="#" onClick="st_openWindow('/footer/', 'PetitPress','width=650,height=550'); return false;">&copy 2009 Petit Press. Autorsk� pr�va s� vyhraden� a vykon�va ich vydavate�. Spravodajsk� licencia vyhraden�.</a></span></p>
158
+ <div class="cb"></div>
159
+ <div class="cb"></div>
160
+ <div class="article-etarget">
161
+ <iframe id="etarget_iframe" src="/storm_real/extra/etarget/etarget_load2.asp?q=5030948&idsek=veda&idrub=veda_eko&nazrub=Ekol%F3gia&l=3&nobr=1&st_odb=sme5&st_cssr=1" border="0" width="100%" height="1" hspace="0" vspace="0" SCROLLING="no" BORDERCOLOR="#FFFFFF" MARGINWIDTH="0" MARGINHEIGHT="0" FRAMEBORDER="0"></iframe>
162
+ </div>
163
+ <div>
164
+ <script type="text/javascript" src="http://diskusie.sme.sk/diskusie/extern_action.php?action=get&id_extern_theme=5030948&extern_type=sme-clanok&limit=5&domain=veda.sme.sk"></script>
165
+ </div>
166
+ <div class="cb"></div>
167
+ </div>
168
+ <script type="text/javascript">
169
+ storm_pg_stat_hit('art', 5030948, '', 1);
170
+ </script>
171
+ <!-- koniec hlavneho obsahu clanku -->
172
+ <!-- FLOATBOX pravy - tu sa rusi nutnost davat cely "floatboxw" do <p></p> -->
173
+ <div id="floatboxw">
174
+ <div class="floatbox extra">
175
+ <p><img src="http://i.sme.sk/cdata/8/50/5030948/otva-r773.jpg" alt="Smog je probl�mom prakticky v�etk�ch kraj�n. Tento z�ber je zo severn�ch �iech." /></p>
176
+ <p>Smog je probl�mom prakticky v�etk�ch kraj�n. Tento z�ber je zo severn�ch �iech.</p>
177
+ <p>Foto: ILUSTRA�N� � �TK</p>
178
+ </div>
179
+ <div class="floatbox"><div id='bmone2n-2337.47.6.20'>
180
+ <noscript><div style='display:inline'><a href='http://ad2.bbmedia.cz/please/redirect/2337/47/6/20/'><img src='http://ad2.bbmedia.cz/please/showit/2337/47/6/20/?typkodu=img' width='300' height='300' style='border-width:0' alt='' /></a></div></noscript>
181
+ </div>
182
+ </div>
183
+ <!-- zaciatok TABy -->
184
+ <!-- storm-art-najcit-box -->
185
+ <div class="floatbox tabbed">
186
+ <div><img src="/imgs/box/najcit_cl.gif" style="top: 0px; right: 0px; border: 0px" alt="naj��tanej�ie" /></div>
187
+ <div class="c3blok">
188
+ <div class="tabs">
189
+ <ul><li class="ac" id="tab1_1hod_button" onclick='tabButtonClicked("tab1", "1hod")'>4 hodiny</li><li class="ia" id="tab1_2hod_button" onclick='tabButtonClicked("tab1", "2hod")'>24h</li><li class="ia" id="tab1_3hod_button" onclick='tabButtonClicked("tab1", "3hod")'>3dni</li><li class="ia" id="tab1_4hod_button" onclick='tabButtonClicked("tab1", "4hod")'>7dn�</li></ul>
190
+ </div>
191
+ <div class="tabsc" id="tab1_1hod_content">
192
+ <ol>
193
+ <li><a href="/c/5031672/vyvinuli-vakcinu-ktora-dokaze-bojovat-s-hiv.html">Vyvinuli vakc�nu, ktor� dok�e bojova� s HIV</a> 926</li>
194
+ <li><a href="/c/5030948/zijeme-vo-veku-hlupakov.html">�ijeme vo veku hlup�kov?</a> 514</li>
195
+ <li><a href="/c/5031698/na-mesiaci-je-voda-potvrdili-to-viacere-misie.html">Na Mesiaci je voda. Potvrdili to viacer� misie</a> 187</li>
196
+ <li><a href="/c/5032517/nasli-najvacsi-anglosasky-poklad.html">Na�li najv��� anglosask� poklad</a> 142</li>
197
+ <li><a href="/c/5028991/ako-vyzera-stred-nasej-galaxie.html">Ako vyzer� stred na�ej galaxie?</a> 85</li>
198
+ <li><a href="/c/5030764/dnesny-clovek-vznikol-vdaka-vareniu-nie-sexu.html">Dne�n� �lovek vznikol v�aka vareniu. Nie sexu</a> 80</li>
199
+ <li><a href="/c/5016600/zomrel-zachranca-miliardy-ludi.html">Zomrel z�chranca miliardy �ud�</a> 48</li>
200
+ <li><img src="/storm/imgs/ico_video_1s.gif" class="ico_uni_l" /><a href="/c/5031325/pandy-by-mali-vymriet-tvrdi-to-prirodovedec.html">Pandy by mali vymrie�. Tvrd� to pr�rodovedec</a> 47</li>
201
+ <li><a href="/c/5028949/ulovili-sestmetroveho-morskeho-kraka-len-druhy-raz.html">Ulovili �es�metrov�ho morsk�ho kraka. Len druh� raz</a> 46</li>
202
+ <li><a href="/c/5030792/nemcov-oslnili-solarne-panely.html">Nemcov oslnili sol�rne panely</a> 41</li>
203
+ </ol>
204
+ </div>
205
+ <div class="tabsc" style="display: none" id="tab1_2hod_content">
206
+ <ol>
207
+ <li><a href="/c/5030948/zijeme-vo-veku-hlupakov.html">�ijeme vo veku hlup�kov?</a> 17�487</li>
208
+ <li><a href="/c/5031698/na-mesiaci-je-voda-potvrdili-to-viacere-misie.html">Na Mesiaci je voda. Potvrdili to viacer� misie</a> 11�513</li>
209
+ <li><a href="/c/5031672/vyvinuli-vakcinu-ktora-dokaze-bojovat-s-hiv.html">Vyvinuli vakc�nu, ktor� dok�e bojova� s HIV</a> 11�012</li>
210
+ <li><a href="/c/5032517/nasli-najvacsi-anglosasky-poklad.html">Na�li najv��� anglosask� poklad</a> 3�275</li>
211
+ <li><img src="/storm/imgs/ico_video_1s.gif" class="ico_uni_l" /><a href="/c/5031325/pandy-by-mali-vymriet-tvrdi-to-prirodovedec.html">Pandy by mali vymrie�. Tvrd� to pr�rodovedec</a> 2�806</li>
212
+ <li><a href="/c/5030764/dnesny-clovek-vznikol-vdaka-vareniu-nie-sexu.html">Dne�n� �lovek vznikol v�aka vareniu. Nie sexu</a> 2�199</li>
213
+ <li><a href="/c/5028991/ako-vyzera-stred-nasej-galaxie.html">Ako vyzer� stred na�ej galaxie?</a> 2�126</li>
214
+ <li><a href="/c/5028949/ulovili-sestmetroveho-morskeho-kraka-len-druhy-raz.html">Ulovili �es�metrov�ho morsk�ho kraka. Len druh� raz</a> 1�929</li>
215
+ <li><a href="/c/5016600/zomrel-zachranca-miliardy-ludi.html">Zomrel z�chranca miliardy �ud�</a> 1�825</li>
216
+ <li><a href="/c/5030792/nemcov-oslnili-solarne-panely.html">Nemcov oslnili sol�rne panely</a> 1�147</li>
217
+ </ol>
218
+ </div>
219
+ <div class="tabsc" style="display: none" id="tab1_3hod_content">
220
+ <ol>
221
+ <li><a href="/c/5030948/zijeme-vo-veku-hlupakov.html">�ijeme vo veku hlup�kov?</a> 17�980</li>
222
+ <li><img src="/storm/imgs/ico_video_1s.gif" class="ico_uni_l" /><a href="/c/5027440/vtak-ktory-jedol-ludi-jestvoval.html">Vt�k, ktor� jedol �ud�, jestvoval</a> 14�645</li>
223
+ <li><a href="/c/5030764/dnesny-clovek-vznikol-vdaka-vareniu-nie-sexu.html">Dne�n� �lovek vznikol v�aka vareniu. Nie sexu</a> 13�155</li>
224
+ <li><a href="/c/5028827/objavili-syrsky-kralovsky-poklad.html">Objavili s�rsky kr�ovsk� poklad</a> 11�819</li>
225
+ <li><a href="/c/5031698/na-mesiaci-je-voda-potvrdili-to-viacere-misie.html">Na Mesiaci je voda. Potvrdili to viacer� misie</a> 11�513</li>
226
+ <li><a href="/c/5031672/vyvinuli-vakcinu-ktora-dokaze-bojovat-s-hiv.html">Vyvinuli vakc�nu, ktor� dok�e bojova� s HIV</a> 11�012</li>
227
+ <li><a href="/c/5030792/nemcov-oslnili-solarne-panely.html">Nemcov oslnili sol�rne panely</a> 10�797</li>
228
+ <li><img src="/storm/imgs/ico_video_1s.gif" class="ico_uni_l" /><a href="/c/5031325/pandy-by-mali-vymriet-tvrdi-to-prirodovedec.html">Pandy by mali vymrie�. Tvrd� to pr�rodovedec</a> 9�313</li>
229
+ <li><a href="/c/5029357/objavili-novu-cast-velkeho-cinskeho-muru.html">Objavili nov� �as� Ve�k�ho ��nskeho m�ru</a> 6�772</li>
230
+ <li><a href="/c/5028991/ako-vyzera-stred-nasej-galaxie.html">Ako vyzer� stred na�ej galaxie?</a> 6�185</li>
231
+ </ol>
232
+ </div>
233
+ <div class="tabsc" style="display: none" id="tab1_4hod_content">
234
+ <ol>
235
+ <li><img src="/storm/imgs/ico_video_1s.gif" class="ico_uni_l" /><a href="/c/5027440/vtak-ktory-jedol-ludi-jestvoval.html">Vt�k, ktor� jedol �ud�, jestvoval</a> 26�503</li>
236
+ <li><a href="/c/5030948/zijeme-vo-veku-hlupakov.html">�ijeme vo veku hlup�kov?</a> 17�980</li>
237
+ <li><a href="/c/5030764/dnesny-clovek-vznikol-vdaka-vareniu-nie-sexu.html">Dne�n� �lovek vznikol v�aka vareniu. Nie sexu</a> 13�155</li>
238
+ <li><a href="/c/5028827/objavili-syrsky-kralovsky-poklad.html">Objavili s�rsky kr�ovsk� poklad</a> 11�819</li>
239
+ <li><a href="/c/5031698/na-mesiaci-je-voda-potvrdili-to-viacere-misie.html">Na Mesiaci je voda. Potvrdili to viacer� misie</a> 11�513</li>
240
+ <li><a href="/c/5031672/vyvinuli-vakcinu-ktora-dokaze-bojovat-s-hiv.html">Vyvinuli vakc�nu, ktor� dok�e bojova� s HIV</a> 11�012</li>
241
+ <li><a href="/c/5030792/nemcov-oslnili-solarne-panely.html">Nemcov oslnili sol�rne panely</a> 10�797</li>
242
+ <li><img src="/storm/imgs/ico_video_1s.gif" class="ico_uni_l" /><a href="/c/5031325/pandy-by-mali-vymriet-tvrdi-to-prirodovedec.html">Pandy by mali vymrie�. Tvrd� to pr�rodovedec</a> 9�313</li>
243
+ <li><a href="/c/5016600/zomrel-zachranca-miliardy-ludi.html">Zomrel z�chranca miliardy �ud�</a> 7�605</li>
244
+ <li><a href="/c/5029357/objavili-novu-cast-velkeho-cinskeho-muru.html">Objavili nov� �as� Ve�k�ho ��nskeho m�ru</a> 6�772</li>
245
+ </ol>
246
+ </div>
247
+ </div>
248
+ <!--###stp1###-->
249
+ </div>
250
+ <!-- storm-art-najcit-box-end -->
251
+ <!-- end TABy -->
252
+ <div class="floatbox tabbed"><div style="margin: 2px 2px 2px 8px;"><iframe id="netsuccess_stip_iframe" src="/storm_real/extra/nss_tip/show.asp?id=1-10" border="0" width="300" height="1" hspace="0" vspace="0" SCROLLING="no" BORDERCOLOR="#FFFFFF" MARGINWIDTH="0" MARGINHEIGHT="0" FRAMEBORDER="0"></iframe></div></div>
253
+ </div>
254
+ <!-- koniec floatboxw -->
255
+ <div class="cb"></div>
256
+ <script type="text/javascript" src="http://www.sme.sk/storm/jscript/clanok.js?rev=10"></script>
257
+
258
+ <script type="text/javascript">
259
+ addLoadEvent(function(){searchSk();});
260
+ </script>
261
+ <!-- storm-c-box-end -->
262
+ </div>
263
+ <!-- END hlavny obsah stedneho dvojstlpca -->
264
+ </div>
265
+ <!-- END stredny stlpec -->
266
+ <!-- lavy stlpec -->
267
+ <div id="col1w">
268
+ <a href="http://www.sme.sk"><img src="/imgs/title_back2.gif" width="125" height="31" style="border: 0px" alt="sp� na SME.sk" /></a><div id="mainmenu"><ul>
269
+ <li class="menu_section_home"><a href="/" id="st_bm_cat_ext_324">Veda</a></li>
270
+ <li><a href="/r/veda_matema/matematika.html" id="st_bm_cat_veda_matema">Matematika</a></li>
271
+ <li><a href="/r/veda_aktual/aktuality.html" id="st_bm_cat_veda_aktual">Aktuality</a></li>
272
+ <li><a href="/r/veda_vesmir/vesmir.html" id="st_bm_cat_veda_vesmir">Vesm�r</a></li>
273
+ <li><a href="/r/veda_fyzika/fyzika.html" id="st_bm_cat_veda_fyzika">Fyzika</a></li>
274
+ <li><a href="/r/veda_botanik/botanika.html" id="st_bm_cat_veda_botanik">Botanika</a></li>
275
+ <li><a href="/r/veda_dinosau/dinosaury.html" id="st_bm_cat_veda_dinosau">Dinosaury</a></li>
276
+ <li><a href="/r/veda_eko/ekologia.html" id="st_bm_cat_veda_eko">Ekol�gia</a></li>
277
+ <li><a href="/r/veda_histor/historia.html" id="st_bm_cat_veda_histor">Hist�ria</a></li>
278
+ <li><a href="/r/veda_psych/psychologia.html" id="st_bm_cat_veda_psych">Psychol�gia</a></li>
279
+ <li><a href="/r/veda_medic/medicina.html" id="st_bm_cat_veda_medic">Medic�na</a></li>
280
+ <li><a href="/r/veda_archeo/archeologia.html" id="st_bm_cat_veda_archeo">Archeol�gia</a></li>
281
+ <li><a href="/r/veda_zoo/zoologia.html" id="st_bm_cat_veda_zoo">Zool�gia</a></li>
282
+ <li><a href="/r/veda_geo/geologia.html" id="st_bm_cat_veda_geo">Geol�gia</a></li>
283
+ <li><a href="/r/veda_techno/technologie.html" id="st_bm_cat_veda_techno">Technol�gie</a></li>
284
+ <li><a href="/r/veda_profil/profily.html" id="st_bm_cat_veda_profil">Profily</a></li>
285
+ <li><a href="/r/veda_pfoto/press-foto.html" id="st_bm_cat_veda_pfoto">Press foto</a></li>
286
+ <li><a href="/r/veda_recenz/recenzie.html" id="st_bm_cat_veda_recenz">Recenzie</a></li>
287
+ <li style="background: url(/imgs/archiv-menu.gif) no-repeat 45px -2px; overflow: visible; height: 27px; padding-top: 3px;"><a href="http://dennik.sme.sk">DENN�K</a></li>
288
+ </ul>
289
+ </div>
290
+ <div style="text-align: center;"><div id='bmone2n-2337.47.4.64'>
291
+ <noscript><div style='display:inline'><a href='http://ad2.bbmedia.cz/please/redirect/2337/47/4/64/'><img src='http://ad2.bbmedia.cz/please/showit/2337/47/4/64/?typkodu=img' width='300' height='300' style='border-width:0' alt='' /></a></div></noscript>
292
+ </div>
293
+ </div><div id="menu-reklama-1"></div>
294
+ <div id="prilmenu">
295
+ <h5>ANKETA</h5>
296
+ <div id="stbox_ank_veda_hs_el"></div>
297
+ <script type="text/javascript">
298
+ function stbox_ank_veda_hs_set(){}
299
+ var stbox_ank_veda_hs = new stbox_ank_veda_hs_set();
300
+ addLoadEvent (function () {
301
+ storm_anketa_init(stbox_ank_veda_hs);
302
+ stbox_ank_veda_hs.ank_idg = 'veda_hs';
303
+ stbox_ank_veda_hs.ank_id = 0;
304
+ stbox_ank_veda_hs.f_show_num = 1;
305
+ stbox_ank_veda_hs.ank_max_width = 75;
306
+ stbox_ank_veda_hs.id_el = 'stbox_ank_veda_hs_el';
307
+ stbox_ank_veda_hs.obj_variable = 'stbox_ank_veda_hs';
308
+ storm_anketa_render(stbox_ank_veda_hs);
309
+ });
310
+ </script>
311
+ <div class="note"><a href="/anketa_archiv.asp?pol=veda_hs">star�ie ankety</a></div>
312
+ </div>
313
+ <div id="c1blokw">
314
+ <div class="c1blok">
315
+ <h5>PR�LOHY</h5>
316
+ <ul>
317
+ <li><a href="http://pocitace.sme.sk">Po��ta�e</a></li>
318
+ <li><a href="http://mobil.sme.sk">Mobil</a></li>
319
+ <li><a href="http://www.sme.sk/zdravie/">Zdravie</a></li>
320
+ <li><a href="http://auto.sme.sk">Auto - Moto</a></li>
321
+ <li><a href="http://bratislava.sme.sk">Bratislava</a></li>
322
+ <li><a href="http://www.sme.sk/forum/">F�rum</a></li>
323
+ <li><a href="http://www.sme.sk/kariera/">Kari�ra</a></li>
324
+ <li><a href="http://cestovanie.sme.sk">Na cest�ch</a></li>
325
+ <li><a href="http://www.sme.sk/vikend/">V�kend</a></li>
326
+ <li><a href="http://zaujimavosti.sme.sk">Zauj�mavosti</a></li>
327
+ </ul>
328
+ </div>
329
+ <div class="c1blok">
330
+ <h5>PETIT PRESS</h5>
331
+ <ul>
332
+ <li><a href="http://www.sme.sk/dok/tiraz/">Tir�</a></li>
333
+ <li><a href="http://inzercia.sme.sk/">Inzercia</a></li>
334
+ <li><a href="http://predplatne.sme.sk">Predplatn�</a></li>
335
+ <li><a href="http://www.sme.sk/kodex/">Etick� k�dex</a></li>
336
+ </ul>
337
+ </div>
338
+ </div>
339
+ </div>
340
+ </div>
341
+ <!-- END hlavny kontent -->
342
+ <!-- footer -->
343
+ <div id="footer">
344
+ <p><a href="http://www.sme.sk/mapa-stranky/">Mapa str�nky</a> | <a href="http://www.sme.sk/dok/kontakt/">kontakt</a> | <a href="http://www.sme.sk/dok/faq/">pomoc</a> | <a href="#null" onClick="st_openWindow('http://www.sme.sk/footer/homeNS.htm', 'Ako homepage', 'width=500,height=600');"><!--[if IE 5]><a href="#null" onClick="this.style.behavior='url(#default#homepage)'; this.setHomePage('http://veda.sme.sk');"><![endif]-->nastavi� ako homepage <!--[if IE 5]></a><![endif]--></a> | <a href="#" onClick="bookmark_site('www.sme.sk', 'www.sme.sk');">prida� medzi ob��ben�</a> | <a href="#null" onClick="st_openWindow('http://www.sme.sk/footer/win.htm', 'PetitPress','width=500,height=300');">&copy; Petit Press, a.s.</a></p>
345
+ </div>
346
+ <!-- END footer -->
347
+ </div>
348
+
349
+ <div id="helpbox" style="width:200px;text-align:left;visibility:hidden;background-color:#FFFFAA;border:1px solid #940400;font:12px Arial, sans-serif;padding:3px;position:absolute;color:black;"></div>
350
+ <script type="text/javascript">
351
+ <!--
352
+ main_onload();
353
+ // -->
354
+ </script>
355
+
356
+
357
+ <script type="text/javascript">
358
+ st_active_cat = 'veda_eko';
359
+ show_active_menu_categ();
360
+ </script>
361
+ <!-- BBmedia one2many code -->
362
+ <!-- HowTo: All the ad codes must be above this code! -->
363
+ <!-- HowTo: It is good idea to place this code just below the last ad code. -->
364
+ <script type='text/javascript' charset='windows-1250' src='http://ad2.bbmedia.cz/bb/bb_one2n.js?56020090925101'></script>
365
+ <script type='text/javascript'>/* <![CDATA[ */
366
+ bmone2n.addPosition('2337.47.6.20','');
367
+ bmone2n.addPosition('2337.47.2.64','');
368
+ bmone2n.addPosition('2337.47.4.64','');
369
+ bmone2n.getAd('ad2.bbmedia.cz','',''+(typeof(bburlparam)=='string'?'&'+bburlparam:''));
370
+ /* ]]> */</script>
371
+ <div id='bmone2t-2337.47.6.20' style='display:none'><script type='text/javascript'>/* <![CDATA[ */ bmone2n.makeAd('2337.47.6.20'); /* ]]> */</script></div>
372
+ <div id='bmone2t-2337.47.2.64' style='display:none'><script type='text/javascript'>/* <![CDATA[ */ bmone2n.makeAd('2337.47.2.64'); /* ]]> */</script></div>
373
+ <div id='bmone2t-2337.47.4.64' style='display:none'><script type='text/javascript'>/* <![CDATA[ */ bmone2n.makeAd('2337.47.4.64'); /* ]]> */</script></div>
374
+ <script type='text/javascript'>/* <![CDATA[ */ bmone2n.moveAd(); /* ]]> */</script>
375
+ <!-- Gemius SA - gemiusAudience / sme.sk -->
376
+ <script type="text/javascript">
377
+ var pp_gemius_identifier = new String('AfUw5KdgJ28KVrrUYjtEp7e9Dow8ZYLaUcGoNn881W7.J7');
378
+ </script>
379
+ <script type="text/javascript" src="http://services.sme.sk/meranie/genius/xgenius.js"></script>
380
+ <script type="text/javascript">
381
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
382
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
383
+ </script>
384
+ <script type="text/javascript">
385
+ var pageTracker = _gat._getTracker("UA-66869-23"); pageTracker._initData(); pageTracker._trackPageview();
386
+ </script>
387
+ <!-- StartK�d: go.cz.bbelements.com 3.1 Dynamick� + p�edchoz� plugin-detekce test(581) / UnikatiTEST(19) / POST(1) / nestandard(27) -->
388
+ <script type='text/javascript' charset='windows-1250'>
389
+ /* <![CDATA[ */
390
+ var bbs=screen,bbn=navigator,bbh;bbh='&ubl='+bbn.browserLanguage+'&ucc='+bbn.cpuClass+'&ucd='+bbs.colorDepth+'&uce='+bbn.cookieEnabled+'&udx='+bbs.deviceXDPI+'&udy='+bbs.deviceYDPI+'&usl='+bbn.systemLanguage+'&uje='+bbn.javaEnabled()+'&uah='+bbs.availHeight+'&uaw='+bbs.availWidth+'&ubd='+bbs.bufferDepth+'&uhe='+bbs.height+'&ulx='+bbs.logicalXDPI+'&uly='+bbs.logicalYDPI+'&use='+bbs.fontSmoothingEnabled+'&uto='+(new Date()).getTimezoneOffset()+'&uti='+(new Date()).getTime()+'&uui='+bbs.updateInterval+'&uul='+bbn.userLanguage+'&uwi='+bbs.width;
391
+ if(typeof(bburlparam)=='string') { bbh+='&'+bburlparam; }
392
+ if(typeof(bbkeywords)=='string') { bbh+='&keywords='+escape(bbkeywords); }
393
+ document.write("<scr"+"ipt charset='windows-1250' type='text/javascript' src='http://go.cz.bbelements.com/please/showit/581/19/1/27/?typkodu=non-standard"+bbh+"&alttext=0&border=0&bgcolor=FFFFFF&text=000000&link=0000FF&target=_blank&bust="+Math.random()+"'>");
394
+ document.write("<"+"\/scr"+"ipt>");
395
+ /* ]]> */
396
+ </script>
397
+ <!-- KonecK�d: go.cz.bbelements.com 3.1 Dynamick� + p�edchoz� plugin-detekce -->
398
+ </body>
399
+ </html>
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: content_scrapper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Gyorgy Frivolt
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-02-13 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: thoughtbot-shoulda
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 2.10.2
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: mocha
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.8
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: sanitize
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 1.2.0
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: nokogiri
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 1.4.1
54
+ version:
55
+ description: If you want to cut only the content of pages, without any other part (like the menu, header, footer, commercials, etc.), you might find this gem very handy. A DSL is also defined for nifty definitions for your screen scrapping and sanitization.
56
+ email: gyorgy.frivolt@gmail.com
57
+ executables: []
58
+
59
+ extensions: []
60
+
61
+ extra_rdoc_files:
62
+ - LICENSE
63
+ - README.rdoc
64
+ files:
65
+ - .document
66
+ - .gitignore
67
+ - LICENSE
68
+ - README.rdoc
69
+ - Rakefile
70
+ - VERSION
71
+ - config/content_scrapper.rb
72
+ - content_scrapper.gemspec
73
+ - lib/content_scrapper.rb
74
+ - lib/content_scrapper/content_mapping.rb
75
+ - lib/content_scrapper/feedzirra.rb
76
+ - rails/init.rb
77
+ - test/helper.rb
78
+ - test/test_content_mapping.rb
79
+ - test/test_content_scrapper.rb
80
+ - test/test_pages/pretty.html
81
+ - test/test_pages/twocontent.html
82
+ - test/test_pages/ugly.html
83
+ has_rdoc: true
84
+ homepage: http://github.com/fifigyuri/content_scrapper
85
+ licenses: []
86
+
87
+ post_install_message:
88
+ rdoc_options:
89
+ - --charset=UTF-8
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: "0"
97
+ version:
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: "0"
103
+ version:
104
+ requirements: []
105
+
106
+ rubyforge_project:
107
+ rubygems_version: 1.3.5
108
+ signing_key:
109
+ specification_version: 3
110
+ summary: Gem for those who want to screen scrap only the content part of web pages, blogs or articles.
111
+ test_files:
112
+ - test/test_content_mapping.rb
113
+ - test/test_content_scrapper.rb
114
+ - test/helper.rb