content_scrapper 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Gyorgy Frivolt
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,17 @@
1
+ = content_scrapper
2
+
3
+ Description goes here.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2010 Gyorgy Frivolt. See LICENSE for details.
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "content_scrapper"
8
+ gem.summary = "Gem for those who want to screen scrap only the content part of web pages, blogs or articles."
9
+ gem.description = "If you want to cut only the content of pages, without any other part (like the menu, header, footer, commercials, etc.), you might find this gem very handy. A DSL is also defined for nifty definitions for your screen scrapping and sanitization."
10
+ gem.email = "gyorgy.frivolt@gmail.com"
11
+ gem.homepage = "http://github.com/fifigyuri/content_scrapper"
12
+ gem.authors = ["Gyorgy Frivolt"]
13
+ gem.add_development_dependency 'thoughtbot-shoulda', '>=2.10.2'
14
+ gem.add_development_dependency 'mocha', '>=0.9.8'
15
+
16
+ gem.add_dependency 'sanitize', '>=1.2.0'
17
+ gem.add_dependency 'nokogiri', '>=1.4.1'
18
+ end
19
+ Jeweler::GemcutterTasks.new
20
+ rescue LoadError
21
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
22
+ end
23
+
24
+ require 'rake/testtask'
25
+ Rake::TestTask.new(:test) do |test|
26
+ test.libs << 'lib' << 'test'
27
+ test.pattern = 'test/**/test_*.rb'
28
+ test.verbose = true
29
+ end
30
+
31
+ begin
32
+ require 'rcov/rcovtask'
33
+ Rcov::RcovTask.new do |test|
34
+ test.libs << 'test'
35
+ test.pattern = 'test/**/test_*.rb'
36
+ test.verbose = true
37
+ end
38
+ rescue LoadError
39
+ task :rcov do
40
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
41
+ end
42
+ end
43
+
44
+ task :test => :check_dependencies
45
+
46
+ task :default => :test
47
+
48
+ require 'rake/rdoctask'
49
+ Rake::RDocTask.new do |rdoc|
50
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
51
+
52
+ rdoc.rdoc_dir = 'rdoc'
53
+ rdoc.title = "content_scrapper #{version}"
54
+ rdoc.rdoc_files.include('README*')
55
+ rdoc.rdoc_files.include('lib/**/*.rb')
56
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.3
@@ -0,0 +1,3 @@
1
+
2
+ sanitize_tags ({:elements => ['p','br', 'b', 'em', 'i', 'strong', 'u', 'a', 'h1', 'h2', 'h3', 'li', 'ol', 'ul'], \
3
+ :attributes => { 'a' => ['href'] }})
@@ -0,0 +1,72 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{content_scrapper}
8
+ s.version = "0.0.3"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Gyorgy Frivolt"]
12
+ s.date = %q{2010-02-13}
13
+ s.description = %q{If you want to cut only the content of pages, without any other part (like the menu, header, footer, commercials, etc.), you might find this gem very handy. A DSL is also defined for nifty definitions for your screen scrapping and sanitization.}
14
+ s.email = %q{gyorgy.frivolt@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "config/content_scrapper.rb",
27
+ "content_scrapper.gemspec",
28
+ "lib/content_scrapper.rb",
29
+ "lib/content_scrapper/content_mapping.rb",
30
+ "lib/content_scrapper/feedzirra.rb",
31
+ "rails/init.rb",
32
+ "test/helper.rb",
33
+ "test/test_content_mapping.rb",
34
+ "test/test_content_scrapper.rb",
35
+ "test/test_pages/pretty.html",
36
+ "test/test_pages/twocontent.html",
37
+ "test/test_pages/ugly.html"
38
+ ]
39
+ s.homepage = %q{http://github.com/fifigyuri/content_scrapper}
40
+ s.rdoc_options = ["--charset=UTF-8"]
41
+ s.require_paths = ["lib"]
42
+ s.rubygems_version = %q{1.3.5}
43
+ s.summary = %q{Gem for those who want to screen scrap only the content part of web pages, blogs or articles.}
44
+ s.test_files = [
45
+ "test/test_content_mapping.rb",
46
+ "test/test_content_scrapper.rb",
47
+ "test/helper.rb"
48
+ ]
49
+
50
+ if s.respond_to? :specification_version then
51
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
52
+ s.specification_version = 3
53
+
54
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
55
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 2.10.2"])
56
+ s.add_development_dependency(%q<mocha>, [">= 0.9.8"])
57
+ s.add_runtime_dependency(%q<sanitize>, [">= 1.2.0"])
58
+ s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
59
+ else
60
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 2.10.2"])
61
+ s.add_dependency(%q<mocha>, [">= 0.9.8"])
62
+ s.add_dependency(%q<sanitize>, [">= 1.2.0"])
63
+ s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
64
+ end
65
+ else
66
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 2.10.2"])
67
+ s.add_dependency(%q<mocha>, [">= 0.9.8"])
68
+ s.add_dependency(%q<sanitize>, [">= 1.2.0"])
69
+ s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
70
+ end
71
+ end
72
+
@@ -0,0 +1,62 @@
1
+
2
+ require 'open-uri'
3
+ require 'nokogiri'
4
+ require 'sanitize'
5
+
6
+ require 'content_scrapper/content_mapping'
7
+
8
+ class ContentScrapper
9
+
10
+ class << self
11
+ attr_accessor :default_config_file, :default
12
+ default_config_file = "#{File.dirname(__FILE__)}/../config/content_scrapper.rb"
13
+
14
+ def create_new_default(*args)
15
+ self.default = self.new(*args)
16
+ end
17
+ end
18
+
19
+ def set_as_default
20
+ ContentScrapper.default = self
21
+ end
22
+
23
+ attr_accessor :content_mappings, :sanitize_settings
24
+
25
+ def initialize(scrapper_config_file = nil)
26
+ @content_mappings = []
27
+ config_file = ContentScrapper.default_config_file
28
+ self.instance_eval(File.read(config_file), config_file) unless config_file.nil?
29
+ end
30
+
31
+ def content_mapping(&block)
32
+ new_mapping = ContentMapping.new
33
+ new_mapping.instance_eval(&block)
34
+ @content_mappings << new_mapping
35
+ end
36
+
37
+ def sanitize_tags(settings)
38
+ @sanitize_settings = settings
39
+ end
40
+
41
+ def scrap_content(url)
42
+ content_mappings.each do | content_mapping |
43
+ if content_mapping.matches_url?(url) and !content_mapping.content_xpaths_list.empty?
44
+ begin
45
+ doc = Nokogiri::HTML(Kernel.open(url))
46
+ content = content_mapping.scrap_content(doc)
47
+ if content.nil?
48
+ return nil
49
+ else
50
+ return Sanitize.clean(content, sanitize_settings)
51
+ end
52
+ rescue Exception
53
+ scrap_content_exception($!)
54
+ end
55
+ end
56
+ end
57
+ nil
58
+ end
59
+
60
+ def scrap_content_exception(exception)
61
+ end
62
+ end
@@ -0,0 +1,29 @@
1
+
2
+ class ContentMapping
3
+
4
+ attr_reader :content_xpaths_list, :url_pattern_regexp
5
+
6
+ def initialize
7
+ @content_xpaths_list = []
8
+ end
9
+
10
+ def url_pattern(pattern)
11
+ @url_pattern_regexp = pattern
12
+ end
13
+
14
+ def content_at(content_xpath)
15
+ @content_xpaths_list << content_xpath
16
+ end
17
+
18
+ def matches_url?(url)
19
+ url =~ @url_pattern_regexp
20
+ end
21
+
22
+ def scrap_content(doc)
23
+ @content_xpaths_list.each do |content_xpath|
24
+ content_section = doc.xpath(content_xpath)
25
+ return content_section.to_a.join("\n") if content_section.count > 0
26
+ end
27
+ nil
28
+ end
29
+ end
@@ -0,0 +1,17 @@
1
+ # feedzirra entries are extended by methods for scrapping content
2
+ require 'feedzirra/feed_entry_utilities'
3
+
4
+ module Feedzirra
5
+ module FeedEntryUtilities
6
+
7
+ # Scrap the content based on the URL and the existing content and return it
8
+ def scrap_content(scrapper = ContentScrapper.default)
9
+ scrapper.scrap_content(self.url) || self.content.to_s
10
+ end
11
+
12
+ # Scrap the content or use the existing one and change the feed entry
13
+ def scrap_content!(scrapper = ContentScrapper.default)
14
+ content = scrap_content(scrapper)
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,3 @@
1
+ require 'content_scrapper'
2
+
3
+ ContentScrapper.default_config_file = "#{RAILS_ROOT}/config/content_scrapper.yml"
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'content_scrapper'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,43 @@
1
+
2
+ require 'helper'
3
+ require 'mocha'
4
+
5
+ class TestContentMapping < Test::Unit::TestCase
6
+
7
+ context "on empty content mapping creation" do
8
+ setup do
9
+ @mapping = ContentMapping.new
10
+ @mapping.instance_eval do
11
+ url_pattern /^http:\/\/www\.matchme\.com\//
12
+ content_at '//div[@id="failing_content"]'
13
+ content_at '//div[@id="itext_content"]'
14
+ content_at '//div[@id="itext_second_content"]'
15
+ end
16
+ end
17
+ should "match the right urls" do
18
+ assert @mapping.matches_url?('http://www.matchme.com/')
19
+ end
20
+ should "not match the wrong urls" do
21
+ assert !@mapping.matches_url?('https://www.somethingelse.org/hfas')
22
+ end
23
+ context "scrapping content for a specific site" do
24
+ setup do
25
+ pretty_content = File.open("#{File.dirname(__FILE__)}/test_pages/pretty.html").read
26
+ @document = Nokogiri::HTML(pretty_content)
27
+ end
28
+ should "extract the content" do
29
+ assert_match(%r{<p><strong>This is a strong text</strong></p>}, @mapping.scrap_content(@document))
30
+ end
31
+ end
32
+ context "on document with two content parts" do
33
+ setup do
34
+ two_content = File.open("#{File.dirname(__FILE__)}/test_pages/twocontent.html").read
35
+ @document = Nokogiri::HTML(two_content)
36
+ end
37
+ should "evaluate the contents in the order as they were added" do
38
+ assert_match(%r{The first one is matched}, @mapping.scrap_content(@document))
39
+ end
40
+ end
41
+ end
42
+
43
+ end
@@ -0,0 +1,123 @@
1
+ require 'helper'
2
+ require 'mocha'
3
+
4
+ class TestContentScrapper < Test::Unit::TestCase
5
+
6
+ ContentScrapper.default_config_file = nil
7
+
8
+ context "on common setting" do
9
+ setup do
10
+ @scrapper = ContentScrapper.new
11
+ @scrapper.instance_eval do
12
+ content_mapping do
13
+ url_pattern /^http:\/\/www\.pretty\.url/
14
+ content_at '//div[@id="failing_content"]'
15
+ content_at '//div[@id="itext_content"]'
16
+ end
17
+
18
+ content_mapping do
19
+ url_pattern /^http:\/\/www\.twopatterns\.url/
20
+ content_at '//div[@id="failing_content"]'
21
+ content_at '//div[@id="itext_content"]'
22
+ end
23
+
24
+ content_mapping do
25
+ url_pattern /^http:\/\/www\.twopatterns\.url/
26
+ content_at '//div[@id="itext_second_content"]'
27
+ end
28
+
29
+ sanitize_tags ({:elements => ['p','br', 'b', 'em', 'i', 'strong', 'u', 'a', 'h1', 'h2', 'h3', 'li', 'ol', 'ul'], \
30
+ :attributes => { 'a' => ['href'] }})
31
+ end
32
+ end
33
+
34
+ context "for known sources with expected content scrapping" do
35
+ setup do
36
+ pretty_content = File.open("#{File.dirname(__FILE__)}/test_pages/pretty.html").read
37
+ stringio = StringIO.new(pretty_content)
38
+ Kernel.expects(:open).returns(stringio)
39
+ @entry_content = @scrapper.scrap_content('http://www.pretty.url/fsdsd')
40
+ end
41
+ should("identify the content") do
42
+ assert_match(%r{<p><strong>This is a strong text</strong></p>}, @entry_content)
43
+ end
44
+ end
45
+
46
+ context "for known pages with unexpected content scrapping" do
47
+ setup do
48
+ ugly_content = File.open("#{File.dirname(__FILE__)}/test_pages/ugly.html").read
49
+ stringio = StringIO.new(ugly_content)
50
+ Kernel.expects(:open).returns(stringio)
51
+ @entry_content = @scrapper.scrap_content('http://www.pretty.url/hsdae')
52
+ end
53
+ should("return nil") { assert_nil @entry_content }
54
+ end
55
+
56
+ context "for unknown pages" do
57
+ setup { @entry_content = @scrapper.scrap_content('http://www.unknown.url/hsdae') }
58
+ should("return nil") { assert_nil @entry_content }
59
+ end
60
+
61
+ context "multiple matching url patterns" do
62
+ setup do
63
+ twocontent = File.open("#{File.dirname(__FILE__)}/test_pages/twocontent.html").read
64
+ stringio = StringIO.new(twocontent)
65
+ Kernel.expects(:open).with('http://www.twopatterns.url').returns(stringio)
66
+ @entry_content = @scrapper.scrap_content('http://www.twopatterns.url')
67
+ end
68
+ should "match the first content" do
69
+ assert_equal 'The first one is matched', @entry_content
70
+ end
71
+ end
72
+
73
+ context "on scrapping with feedzirra" do
74
+ setup do
75
+ require 'content_scrapper/feedzirra'
76
+ require 'sax-machine'
77
+ require 'feedzirra/parser/rss_entry'
78
+ require 'feedzirra/parser/atom_entry'
79
+ end
80
+
81
+ context "feed entry with not parsable remote content, but with feed content set" do
82
+ setup do
83
+ @feed_entries = [ Feedzirra::Parser::RSSEntry.new, Feedzirra::Parser::AtomEntry.new ]
84
+ @feed_entries.each do |feed_entry|
85
+ feed_entry.url = 'http://www.unknown.url/wedhsf'
86
+ feed_entry.content = 'Pretty well written content is this.'
87
+ end
88
+ Kernel.expects(:open).with('http://www.unknown.url/wedhsf').never
89
+ end
90
+ should("return the original feed content") do
91
+ @feed_entries.each do |feed_entry|
92
+ assert_equal 'Pretty well written content is this.', feed_entry.scrap_content(@scrapper)
93
+ feed_entry.scrap_content!(@scrapper)
94
+ assert_equal 'Pretty well written content is this.', feed_entry.content
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ context "on setting default content scrapper" do
102
+ setup { @scrapper = ContentScrapper.create_new_default }
103
+ should "set the default to the recently created" do
104
+ assert_equal @scrapper, ContentScrapper.default
105
+ end
106
+ context "when changing default content scrapper" do
107
+ setup { @new_scrapper = ContentScrapper.new.set_as_default }
108
+ should "change the default to the new content scrapper" do
109
+ assert_equal @new_scrapper, ContentScrapper.default
110
+ end
111
+ end
112
+ context "for feed entry" do
113
+ setup do
114
+ @feed_entry = Feedzirra::Parser::RSSEntry.new
115
+ @feed_entry.url = 'http://www.unknown.url/gerhe'
116
+ @feed_entry.content = 'We should get this.'
117
+ end
118
+ should("scrap content by the default scrapper") do
119
+ assert_equal 'We should get this.', @feed_entry.scrap_content
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,17 @@
1
+ <html xmlns="http://www.w3.org/1999/xhtml">
2
+ <head>
3
+ <title>Pretty title</title>
4
+ </head>
5
+ <body>
6
+ <div class="articlec col">
7
+ <div id="itext_content">
8
+ <p><strong>This is a strong text</strong></p><br/>
9
+ SOMEWHERE. Things happened...
10
+ <p>paragraph1</p>
11
+ <p>paragraph2</p>
12
+ <h3>Section</h3>
13
+ <p>paragraph3</p>
14
+ <p>paragraph4</p>
15
+ <p>paragraph5</p>
16
+ </body>
17
+ </html>
@@ -0,0 +1,11 @@
1
+ <html xmlns="http://www.w3.org/1999/xhtml">
2
+ <head>
3
+ <title>Pretty title</title>
4
+ </head>
5
+ <body>
6
+ <div class="articlec col">
7
+ <div id="itext_second_content">The second pattern should not be matched</div>
8
+ <div id="itext_content">The first one is matched</div>
9
+ </div>
10
+ </body>
11
+ </html>
@@ -0,0 +1,399 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml">
3
+ <head>
4
+ <link rel="canonical" href="http://veda.sme.sk/c/5030948/zijeme-vo-veku-hlupakov.html" />
5
+ <meta http-equiv="Content-Type" content="text/html; charset=windows-1250" />
6
+ <meta name="description" content="Zd� sa, �e v prebiehaj�cej diskusii o kl�me chc� v�etci svetu iba dobre. Pre�o potom pad� to�ko ostr�ch slov?" />
7
+ <meta name="keywords" content="veda, technika, botanika, zool�gia, medic�na, psychol�gia, sociol�gia, dinosaury" />
8
+ <meta name="copyright" content="Petit Press, a.s." />
9
+ <meta name="author" content="SME - Petit Press, a.s." />
10
+ <meta name="revisit-after" content="1 days" />
11
+ <meta name="classification" content="Media" />
12
+ <meta name="distribution" content="Global" />
13
+ <meta name="rating" content="General" />
14
+ <meta name="doc-type" content="Web Page" />
15
+ <meta name="doc-class" content="Published" />
16
+ <meta name="doc-rights" content="Copywritten Work" />
17
+ <meta name="doc-publisher" content="Petit Press, a.s." />
18
+ <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon" />
19
+ <link rel="alternate" type="application/rss+xml" title="RSS" href="http://rss.sme.sk/rss/rss.asp?sek=veda" />
20
+ <title>�ijeme vo veku hlup�kov? | Ekol�gia | veda.sme.sk</title>
21
+ <link rel="stylesheet" type="text/css" href="http://www.sme.sk/css/sme_layout.css?rev=19" />
22
+ <link rel="stylesheet" type="text/css" href="http://veda.sme.sk/css/sme_sub_layout.css?rev=24" />
23
+ <link rel="stylesheet" type="text/css" href="http://www.sme.sk/css/sme_header.css" />
24
+ <link rel="stylesheet" type="text/css" href="http://veda.sme.sk/css/sme_sub_header.css" />
25
+ <link rel="stylesheet" type="text/css" href="http://www.sme.sk/css/sme_article.css?rev=8" />
26
+ <link rel="stylesheet" href="http://www.sme.sk/css/sme_article2.css?rev=4" type="text/css" />
27
+ <link rel="stylesheet" href="http://www.sme.sk/css/sme_article_hist.css" type="text/css" />
28
+ <script type="text/javascript" src="http://veda.sme.sk/jscript/main.js?rev=19"></script>
29
+ <script type="text/javascript" src="http://veda.sme.sk/storm_real/jscript/anketa.js?rev=18"></script>
30
+ <script type="text/javascript" src="http://veda.sme.sk/jscript/x.js"></script>
31
+ <script type="text/javascript" src="http://veda.sme.sk/jscript/my_x.js"></script>
32
+ <link rel="stylesheet" type="text/css" href="http://www.sme.sk/storm_real/pager/pager.css" />
33
+ <link rel="stylesheet" type="text/css" href="http://www.sme.sk/css/sme_pager.css" />
34
+ <script type="text/javascript" src="http://www.sme.sk/storm_real/pager/pager_common.js"></script>
35
+ <script type="text/javascript" src="http://www.sme.sk/storm_real/pager/pager.js"></script>
36
+ </head>
37
+ <body onload="resizeLogo(); ">
38
+ <div class="top-ban"><div id='bmone2n-2337.47.2.64'>
39
+ <noscript><div style='display:inline'><a href='http://ad2.bbmedia.cz/please/redirect/2337/47/2/64/'><img src='http://ad2.bbmedia.cz/please/showit/2337/47/2/64/?typkodu=img' width='300' height='300' style='border-width:0' alt='' /></a></div></noscript>
40
+ </div>
41
+ </div>
42
+ <div id="logo">
43
+ <a href="http://www.sme.sk/"><img src="http://www.sme.sk/imgs/logo4.gif" width="139" height="39" alt="SME logo" /></a>
44
+ <!-- header -->
45
+ <div id="pager">
46
+ <div id="cita">��ta 23�684 �ud�</div>
47
+ <script type="text/javascript">
48
+ pager_generate(pager_commonPager, 0);
49
+ </script>
50
+ </div>
51
+ <!-- END header -->
52
+ </div>
53
+ <!-- header HS/PS -->
54
+ <div id="headerw">
55
+ <a href="http://veda.sme.sk"><img id="logoImg" src="http://veda.sme.sk/imgs/logo2/veda.gif" alt="logo" /></a>
56
+ </div>
57
+ <!-- END header -->
58
+ <div id="pagew">
59
+ <!-- navigacia & search -->
60
+ <div id="sroww">
61
+ <div id="sr_left"><a href="http://www.sme.sk">Sme.sk</a> | <a href="/">Veda</a> | <a href="/r/veda_eko/ekologia.html">Ekol�gia</a> | �ijeme vo veku hlup�kov?</div>
62
+ <div id="sr_searchw">
63
+ <div id="search">
64
+ <!-- SiteSearch Google -->
65
+ <form method="get" action="/gsearch/" target="_top">
66
+ <input type="hidden" name="domains" value="veda.sme.sk"></input>
67
+ <input id="sbi" name="q" type="text" value=" vyh�ad�vanie" onFocus="if(this.value==' vyh�ad�vanie') this.value='';" class="box" />
68
+ <label><input name="sitesearch" type="radio" value="veda.sme.sk" style="font-size: 5px" checked />v tejto sekcii</label>
69
+ <label><input name="sitesearch" type="radio" value="sme.sk" style="font-size: 5px" />v celom SME</label>
70
+ <input id="sbb" type="image" src="http://www.sme.sk/imgs/find_go.gif" width="20" height="17" align="absmiddle" />
71
+ <input type="hidden" name="client" value="pub-6456364300478111"></input>
72
+ <input type="hidden" name="forid" value="1"></input>
73
+ <input type="hidden" name="channel" value="5047212324"></input>
74
+ <input type="hidden" name="ie" value="windows-1250"></input>
75
+ <input type="hidden" name="oe" value="windows-1250"></input>
76
+ <input type="hidden" name="cof" value="GALT:#008000;GL:1;DIV:#D00000;VLC:663399;AH:center;BGC:FFFFFF;LBGC:D00000;ALC:D00000;LC:D00000;T:000000;GFNT:D00000;GIMP:D00000;FORID:11"></input>
77
+ <input type="hidden" name="hl" value="sk"></input>
78
+ </form>
79
+ <!-- SiteSearch Google -->
80
+ </div>
81
+ </div>
82
+ </div>
83
+ <!-- END navigacia -->
84
+ <!-- hlavny kontent -->
85
+ <div id="mainw" class="col2">
86
+ <!-- stredny + pravy stlpec -->
87
+ <div id="col2w" class="col2">
88
+ <!-- hlavny obsah stedneho stlpca -->
89
+ <div id="contentw">
90
+ <!-- storm-c-box -->
91
+ <script type="text/javascript" src="http://www.sme.sk/storm/jscript/euroConvertor_data.js?rev=6-25"></script>
92
+ <script type="text/javascript" src="http://www.sme.sk/storm/jscript/euroConvertor2.js?rev=10"></script>
93
+ <link rel="stylesheet" href="http://www.sme.sk/storm/css/euroConvertor2.css?rev=2" type="text/css" />
94
+
95
+ <!-- eTarget ContextAd Start -->
96
+ <div id="contenth" class="art">
97
+ <div class="options"><a href="/clanok_tlac.asp?cl=5030948"><img src="/storm/imgs/toolbar/tlac.gif" title="vytla�i�" border="0" /><span>VYTLA�I�</span></a> <a href="/diskusie/reaction_show.php?id_extern_theme=5030948&extern_type=sme-clanok"><img src="/storm/imgs/toolbar/koment.gif" title="diskutujte" border="0" /><span>diskutujte</span></a> <a href="#" onclick="send_err_info('art', 5030948);"><img src="/storm/imgs/toolbar/chyba.gif" title="Upozornite na chybu" border="0" /><span>UPOZORNITE NA CHYBU</span></a> <span class="sharespan">Po�lite:</span>
98
+ <a href="/clanok_tool.asp?t=odp&cl=5030948" class="sharelink"><img src="/storm/imgs/toolbar/email.gif" title="po�lite e-mailom" border="0" /><span>E-MAILOM</span></a> <a href="javascript:location.href='http://www.facebook.com/share.php?u='+encodeURIComponent(location.href);" onclick="return art_poslite_click('fb')" class="sharelink"><img src="/storm/imgs/toolbar/fb.gif" border="0" title="prida� na facebook" ><span>na facebook</span></a>
99
+ <a href="javascript:location.href='http://vybrali.sme.sk/sub.php?url='+encodeURIComponent(location.href);" class="sharelink" onclick="return art_poslite_click('vbsme');"><img src="/storm/imgs/toolbar/doasdf_c.gif" title="prida� na vybrali.sme.sk" border="0" /><span>VYBRALI.SME</span></a> | <a href="#" class="sharelink" onmouseover="sharelinkOver()" onmouseout="sharelinkOut()"><span>�al�ie</span><img src="/storm/imgs/toolbar/share_arrow.gif" alt="+" border="0"></a>
100
+ <div class="share_more" id="share_more" onmouseover="sharemoreOver();" onmouseout="sharemoreOut();">
101
+ <ul>
102
+ <li><a href="http://delicious.com/save" onclick="return art_poslite_click('del')" class="sharelink"><img src="/storm/imgs/toolbar/delicious.gif" title="prida� na delicious" border="0"><span>delicious.com</span></a></li>
103
+ <li><a href="http://www.google.com/bookmarks" onclick="return art_poslite_click('g')" class="sharelink"><img src="/storm/imgs/toolbar/google.gif" title="prida� na google" border="0"><span>Google Bookmarks</span></a></li>
104
+ <li><a href="http://www.myspace.com/Modules/PostTo/Pages/" onclick="return art_poslite_click('myspc')" class="sharelink"><img src="/storm/imgs/toolbar/myspc.gif" title="prida� na myspace" border="0"><span>MySpace</span></a></li>
105
+ <li><a href="http://www.twitter.com/home?status=" onclick="return art_poslite_click('twit')" class="sharelink"><img src="/storm/imgs/toolbar/twit.gif" title="prida� na twitter" border="0"><span>Twitter</span></a></li>
106
+ </ul>
107
+ </div>
108
+ </div>
109
+ <h1>�ijeme vo veku hlup�kov?</h1>
110
+ </div>
111
+ <div class="articlec col">
112
+ <div id="CORRUPTED_HERE_itext_content">
113
+ <p><strong>Zd� sa, �e v prebiehaj�cej diskusii o kl�me chc� v�etci svetu iba dobre. Pre�o potom pad� to�ko ostr�ch slov?</strong><br /><br />BRATISLAVA. V pondelok mal v bratislavskom A4 - Nultom priestore premi�ru film Franny Armstrongovej The Age of Stupid (Vek hlup�kov), odohr�vaj�ci sa v roku 2055 po glob�lnom jadrovom konflikte, vyvolanom zmenou kl�my.</p>
114
+ <p>Star� mu�, ktor� �ije v zni�enom svete, si pozer� z�bery z filmov�ho arch�vu z na�ich �ias a p�ta sa: Pre�o sme nezastavili klimatick� zmenu, ke� bol e�te �as?</p>
115
+ <p>Odpove� znie, �e �udstvo sa odjak�iva spr�valo hl�po, d�kazom �oho s� pretrv�vaj�ce vojny o majetky a neschopnos� pozrie� sa dopredu. Ani na za�iatku 21. storo�ia zo svojich zvyklost� nevybo�ilo.<br />�o in� teda mohli hlup�ci �aka�, ke� si tak systematicky p�lili pod sebou kon�r?</p>
116
+ <h3>Dva scen�re</h3>
117
+ <p>V�etko sa zdalo tak� jednoduch�. Ak je pravda, �e kl�mu men�� �lovek, sta�ilo obmedzi� spa�ovanie uhlia a ropy, pr�padne sa nau�i� odb�rava� oxid uhli�it� � a <a href="http://www.sme.sk/storm/itextg.asp?idh=4791644&ids=6" class="itext_link">glob�lne otep�ovanie</a> by sa (�asom) zastavilo. Hlup�ci to v�ak neboli schopn� urobi�.</p>
118
+ <p style="text-align: center;"><img src="http://i.sme.sk/vydania/20090924/photo2/sm-0924-026c-kniha.rw.jpg" /></p>
119
+ <p style="text-align: center;"><span style="font-size: x-small;">Lord Lawson nap�sal knihu o glob�lnom otep�ovan� �triezvo a bez em�ci�, hoci s�m sa em�ci�m v�dy nevyhol.<br />P�e, �e veda o kl�me je iba jedn�m aspektom glob�lneho otep�ovania. D�le�it� je rozozna�, ak� opatrenia<br />s� politicky re�lne. Urobia� politici spr�vne rozhodnutie? </span></p>
120
+ <p>Teraz predlo�me in� scen�r, v ktorom tie� hraj� hlavn� �lohu nerozumn� �udia, tentoraz z knihy britsk�ho ekon�ma Nigela Lawsona Vr�me sa k rozumu (v �eskom preklade Petra Hol��ka, Doko��n, <a href="http://www.sme.sk/storm/itextg.asp?idh=4791640&ids=7" class="itext_link">Praha</a> 2009).<br />Lord Lawson of Blaby, minister vo vl�de Margaret Thatcherovej kon�tatuje, �e svet netreba zachra�ova� pred glob�lnym otep�ovan�m, ale pred t�mi, ktor� preh��aj� jeho nebezpe�enstvo.</p>
121
+ <p>�Nov� n�bo�enstvo glob�lneho otep�ovania... obsahuje zrnko pravdy a obrovsk� kopu nezmyslov... Zd� sa, �e sme vst�pili do nov�ho veku nerozumu, �o je hlboko znepokojuj�ce.�</p>
122
+ <p>Lawsonova poz�cia je zalo�en� na ve�mi optimistickom presved�en�, �e s��asn� klimatick� po��ta�ov� modely nemo�no prece�ova�, lebo pracuj� s pr�li� zlo�it�m a� chaotick�m syst�mom.</p>
123
+ <p>Nem�me teda istotu, �e glob�lne teploty sa bud� �alej zvy�ova�.</p>
124
+ <h3>Na�o pom�ha� bohat�m?</h3>
125
+ <p>Lawson v�ak prip���a, �e otep�ovanie m��e pokra�ova�. V tom pr�pade treba zv�i� n�klady, ktor� prinesie zni�ovanie emisi�, a pr�nosy, ktor� z toho pre na�ich potomkov vyplyn�.</p>
126
+ <p>Ke�e Lawsonovi je jasn�, �e v glob�le svet speje k v��iemu bohatstvu, je nerozumn� zajtra vynalo�i� obrovsk� �iastky na nov� technol�gie iba preto, aby bohat� �udia koncom tohto storo�ia boli iba o trochu chudobnej�� (aj tak im hroz� nieko�kon�sobne v��ie bohatstvo ako m�me k dispoz�cii my dnes).</p>
127
+ <p>�al��m argumentom je postoj� ��ny a Indie. Obe krajiny maj� obrovsk� z�soby uhlia a obe chc� pre svojich �ud� vy��iu �ivotn� �rove�, ak� si po�as priemyselnej revol�cie vybudovali s vyu�it�m fos�lnych pal�v Eur�pa a Severn� Amerika.</p>
128
+ <p>Taktie� nie je jednoduch� rozhodn��, ktor� z mo�n�ch glob�lnych hrozieb (terorizmus,� celosvetov� pand�mia, dopad asteroidu) je v porovnan� s glob�lnym otep�ovan�m naliehavej�ia; spom�na knihu s��asn�ho predsedu britskej Kr�ovskej spolo�nosti Martina Reesa Na�a posledn� hodina, z ktorej rozhodne nevypl�va, �e by svet mal prednostne rie�i� pr�ve glob�lne otep�ovanie.</p>
129
+ <p>V re�lnom svete m�me kone�n� zdroje a mus�me ur�ova� priority, tomuto sa unikn�� ned�, p�e Lawson.<br />�V�znam by mohla ma� iba z�sadn� zmena n�ho sp�sobu �ivota, ak� by priniesol podstatn� n�rast cien energie; ten je nevyhnutn� nielen preto, aby sme jej spotreb�vali menej, ale aj preto, aby sa bezuhl�kov� energia stala konkurencieschopnou.�</p>
130
+ <p>�</p>
131
+ <div class="clanok-plus">
132
+ <div class="clanok-plus-nadpis">�erven� �iara nad kl�mou?</div>
133
+ <p><strong>Film Vek hlup�kov stoj� za to, aby sme si ho pozreli. Za pre��tanie stoj� aj kniha lorda Lawsona.</strong><br /><br />Jeden z div�kov k filmu The Age of Stupid povedal: �Vedci, filmov� producenti a nakoniec aj �ir�ia verejnos� si za��naj� pomaly uvedomova� osudov� prepojenos� a zlo�itos� s��asn�ho sveta. Film ukazuje, �e ka�dodenn� �ivot �ud� m��e ma� ve�mi v�znamn� dopady na �ivoty �ud� na opa�nej strane sveta. Hoci sa zameriava najm� na kl�mu, jeho posolstvo je hlb�ie; sna�� sa nazna�i�, �e s na��m s��asn�m poh�adom na pr�rodu a svet nie je v�etko v poriadku.�</p>
134
+ <p>Kniha lorda Lawsona Vr�me sa k rozumu je zas pozoruhodn� najm� t�m, �e analyzuje glob�lne otep�ovanie z praktickej str�nky. Zrejme si naozaj m�lokto uvedomuje, �o v�etko bude treba obetova�, aby sme dok�zali obmedzi� emisie z ropy a uhlia nato�ko, aby sa prestal zvy�ova� obsah oxidu uhli�it�ho v ovzdu��.</p>
135
+ <p>Autor s�ubuje poh�ad bez em�ci�, preto je �koda, �e na z�ver stoto��uje bojovn�kov proti oxidu uhli�it�mu s marxistami, ktor� ke� zistili, �e sa komunizmus skon�il, sa r�chlo prefarbili na zeleno.</p>
136
+ <p>Je to nef�r �vaha, najm� ke� oxidom uhli�it�m kedysi argumentovala aj Lawsonova ��fka, ��elezn� lady� Margaret Thatcherov� vo svojom spravodlivom boji proti rebeluj�cim ban�kom.</p>
137
+ <p>�Alarmisti� v�ak m��u kontrova�: ak rozv�ny Lawson navrhuje v�bec nejak� opatrenia, potom najm� tak�, ak�ch bol schopn� u� nevzdelan� rusk� mu�ik v �asoch Petra I. (hovor� sa im adapt�cia): ke� bude zima, d�m si ko�uch. Ke� bude teplo, tak ho vyzle�iem. Nu� a ak bude ve�a vody, navle�iem si vy��ie gum�ky.</p>
138
+ <div class="clanok-plus-autor">(a�)</div>
139
+ </div>
140
+ <div class="clanok-plus">
141
+ <div class="clanok-plus-nadpis">Energia a Parkinsonove z�kony</div>
142
+ <p><strong>N�jdu sa spolo�n� stanovisk� pri spolo�nej snahe uplat�ova� n�zkouhl�kov� energetick� zdroje?</strong><br /><br />BRATISLAVA. Parkinsonov z�kon rovnosti expertov hovor�, �e na ka�d�ho experta pripad� in� expert s opa�n�m n�zorom.</p>
143
+ <p>Uk�zalo sa to aj v diskusii po filme The Age of Stupid v bratislavskom V-klube, ktor� vtipne moderoval Juraj Rizman zo slovenskej pobo�ky Greenpeace.</p>
144
+ <p style="text-align: center;"><img src="http://i.sme.sk/vydania/20090924/photo2/sm-0924-026c-ropnaveza.rw.jpg" /></p>
145
+ <p style="text-align: center;"><span style="font-size: x-small;">Symbol glob�lnej civiliz�cie � ropn� vrtn� ve�a. ILUSTRA�N� FOTO � TASR/AP</span></p>
146
+ <p>Traja z�stancovia nevyhnutnosti obmedzi� obsah uhl�ka vo vzduchu sa v nej nemohli zhodn��, ako to re�lne dosiahnu�: vyu��va� viac vetern� a slne�n� energiu a energiu biomasy alebo aj nepopul�rnu jadrov�?<br />Je jasn�, �e ve�mi v�tan�mi zdrojmi s� aj vietor (pozrite sa do Rak�ska, do Ma�arska �i do Ve�kej Brit�nie), aj Slnko (pozrite sa do Nemecka). Zatia� v�ak vo svete aj u n�s tvoria iba okrajov� doplnok energie, z�skavanej z klasick�ch zdrojov.</p>
147
+ <p>No a ak sa pozriete treb�rs do Franc�zska, zist�te, �e zabezpe�uje v��inu elektrickej energie z jadrov�ch reaktorov.</p>
148
+ <p>Zrejme v�aka v�asn�mu n�stupu hne� po druhej svetovej vojne, ke� e�te jadrov� reaktory bu� neboli tak� drah�, alebo tak� hroziv�.</p>
149
+ <p>�o teda potrebujeme, ak berieme klimatick� zmenu v�ne? Zrejme v�etky tieto zdroje. Ide�lne by bolo, keby raz nastal s�hlas expertov aspo� v tomto.</p>
150
+ <p>Preto�e ak sa nevedia dohodn�� traja �udia z jednej krajiny, ktor�m ide o spolo�n� vec, ako sa maj� dohodn�� tis�cky expertov zo stoviek kraj�n, z�ujmy ktor�ch s� ve�mi r�znorod�?</p>
151
+ <p>Ke�e z�stupcovia vl�dnych in�tit�ci� pozvanie na diskusiu vo �v��ku� ignorovali, vraj ako v�dy, nedozvedeli sme sa, �o maj� v skuto�nosti v pl�ne oni, ak v�bec nie�o.</p>
152
+ <div class="clanok-plus-autor">(a�)</div>
153
+ </div>
154
+ </div>
155
+ <!-- eTarget ContextAd End -->
156
+
157
+ <p class="autor_line"><b>�tvrtok 24. 9. 2009</b> | <a href="http://www.sme.sk/autor_info.asp?id=58">Michal A�</a><br /><span class="copyr_sme">�l�nok bol uverejnen� v tla�enom vydan� SME. (<a href="http://predplatne.sme.sk">Predpla�te si SME cez internet.</a>)</span><br /><span class="copyr"><a href="#" onClick="st_openWindow('/footer/', 'PetitPress','width=650,height=550'); return false;">&copy 2009 Petit Press. Autorsk� pr�va s� vyhraden� a vykon�va ich vydavate�. Spravodajsk� licencia vyhraden�.</a></span></p>
158
+ <div class="cb"></div>
159
+ <div class="cb"></div>
160
+ <div class="article-etarget">
161
+ <iframe id="etarget_iframe" src="/storm_real/extra/etarget/etarget_load2.asp?q=5030948&idsek=veda&idrub=veda_eko&nazrub=Ekol%F3gia&l=3&nobr=1&st_odb=sme5&st_cssr=1" border="0" width="100%" height="1" hspace="0" vspace="0" SCROLLING="no" BORDERCOLOR="#FFFFFF" MARGINWIDTH="0" MARGINHEIGHT="0" FRAMEBORDER="0"></iframe>
162
+ </div>
163
+ <div>
164
+ <script type="text/javascript" src="http://diskusie.sme.sk/diskusie/extern_action.php?action=get&id_extern_theme=5030948&extern_type=sme-clanok&limit=5&domain=veda.sme.sk"></script>
165
+ </div>
166
+ <div class="cb"></div>
167
+ </div>
168
+ <script type="text/javascript">
169
+ storm_pg_stat_hit('art', 5030948, '', 1);
170
+ </script>
171
+ <!-- koniec hlavneho obsahu clanku -->
172
+ <!-- FLOATBOX pravy - tu sa rusi nutnost davat cely "floatboxw" do <p></p> -->
173
+ <div id="floatboxw">
174
+ <div class="floatbox extra">
175
+ <p><img src="http://i.sme.sk/cdata/8/50/5030948/otva-r773.jpg" alt="Smog je probl�mom prakticky v�etk�ch kraj�n. Tento z�ber je zo severn�ch �iech." /></p>
176
+ <p>Smog je probl�mom prakticky v�etk�ch kraj�n. Tento z�ber je zo severn�ch �iech.</p>
177
+ <p>Foto: ILUSTRA�N� � �TK</p>
178
+ </div>
179
+ <div class="floatbox"><div id='bmone2n-2337.47.6.20'>
180
+ <noscript><div style='display:inline'><a href='http://ad2.bbmedia.cz/please/redirect/2337/47/6/20/'><img src='http://ad2.bbmedia.cz/please/showit/2337/47/6/20/?typkodu=img' width='300' height='300' style='border-width:0' alt='' /></a></div></noscript>
181
+ </div>
182
+ </div>
183
+ <!-- zaciatok TABy -->
184
+ <!-- storm-art-najcit-box -->
185
+ <div class="floatbox tabbed">
186
+ <div><img src="/imgs/box/najcit_cl.gif" style="top: 0px; right: 0px; border: 0px" alt="naj��tanej�ie" /></div>
187
+ <div class="c3blok">
188
+ <div class="tabs">
189
+ <ul><li class="ac" id="tab1_1hod_button" onclick='tabButtonClicked("tab1", "1hod")'>4 hodiny</li><li class="ia" id="tab1_2hod_button" onclick='tabButtonClicked("tab1", "2hod")'>24h</li><li class="ia" id="tab1_3hod_button" onclick='tabButtonClicked("tab1", "3hod")'>3dni</li><li class="ia" id="tab1_4hod_button" onclick='tabButtonClicked("tab1", "4hod")'>7dn�</li></ul>
190
+ </div>
191
+ <div class="tabsc" id="tab1_1hod_content">
192
+ <ol>
193
+ <li><a href="/c/5031672/vyvinuli-vakcinu-ktora-dokaze-bojovat-s-hiv.html">Vyvinuli vakc�nu, ktor� dok�e bojova� s HIV</a> 926</li>
194
+ <li><a href="/c/5030948/zijeme-vo-veku-hlupakov.html">�ijeme vo veku hlup�kov?</a> 514</li>
195
+ <li><a href="/c/5031698/na-mesiaci-je-voda-potvrdili-to-viacere-misie.html">Na Mesiaci je voda. Potvrdili to viacer� misie</a> 187</li>
196
+ <li><a href="/c/5032517/nasli-najvacsi-anglosasky-poklad.html">Na�li najv��� anglosask� poklad</a> 142</li>
197
+ <li><a href="/c/5028991/ako-vyzera-stred-nasej-galaxie.html">Ako vyzer� stred na�ej galaxie?</a> 85</li>
198
+ <li><a href="/c/5030764/dnesny-clovek-vznikol-vdaka-vareniu-nie-sexu.html">Dne�n� �lovek vznikol v�aka vareniu. Nie sexu</a> 80</li>
199
+ <li><a href="/c/5016600/zomrel-zachranca-miliardy-ludi.html">Zomrel z�chranca miliardy �ud�</a> 48</li>
200
+ <li><img src="/storm/imgs/ico_video_1s.gif" class="ico_uni_l" /><a href="/c/5031325/pandy-by-mali-vymriet-tvrdi-to-prirodovedec.html">Pandy by mali vymrie�. Tvrd� to pr�rodovedec</a> 47</li>
201
+ <li><a href="/c/5028949/ulovili-sestmetroveho-morskeho-kraka-len-druhy-raz.html">Ulovili �es�metrov�ho morsk�ho kraka. Len druh� raz</a> 46</li>
202
+ <li><a href="/c/5030792/nemcov-oslnili-solarne-panely.html">Nemcov oslnili sol�rne panely</a> 41</li>
203
+ </ol>
204
+ </div>
205
+ <div class="tabsc" style="display: none" id="tab1_2hod_content">
206
+ <ol>
207
+ <li><a href="/c/5030948/zijeme-vo-veku-hlupakov.html">�ijeme vo veku hlup�kov?</a> 17�487</li>
208
+ <li><a href="/c/5031698/na-mesiaci-je-voda-potvrdili-to-viacere-misie.html">Na Mesiaci je voda. Potvrdili to viacer� misie</a> 11�513</li>
209
+ <li><a href="/c/5031672/vyvinuli-vakcinu-ktora-dokaze-bojovat-s-hiv.html">Vyvinuli vakc�nu, ktor� dok�e bojova� s HIV</a> 11�012</li>
210
+ <li><a href="/c/5032517/nasli-najvacsi-anglosasky-poklad.html">Na�li najv��� anglosask� poklad</a> 3�275</li>
211
+ <li><img src="/storm/imgs/ico_video_1s.gif" class="ico_uni_l" /><a href="/c/5031325/pandy-by-mali-vymriet-tvrdi-to-prirodovedec.html">Pandy by mali vymrie�. Tvrd� to pr�rodovedec</a> 2�806</li>
212
+ <li><a href="/c/5030764/dnesny-clovek-vznikol-vdaka-vareniu-nie-sexu.html">Dne�n� �lovek vznikol v�aka vareniu. Nie sexu</a> 2�199</li>
213
+ <li><a href="/c/5028991/ako-vyzera-stred-nasej-galaxie.html">Ako vyzer� stred na�ej galaxie?</a> 2�126</li>
214
+ <li><a href="/c/5028949/ulovili-sestmetroveho-morskeho-kraka-len-druhy-raz.html">Ulovili �es�metrov�ho morsk�ho kraka. Len druh� raz</a> 1�929</li>
215
+ <li><a href="/c/5016600/zomrel-zachranca-miliardy-ludi.html">Zomrel z�chranca miliardy �ud�</a> 1�825</li>
216
+ <li><a href="/c/5030792/nemcov-oslnili-solarne-panely.html">Nemcov oslnili sol�rne panely</a> 1�147</li>
217
+ </ol>
218
+ </div>
219
+ <div class="tabsc" style="display: none" id="tab1_3hod_content">
220
+ <ol>
221
+ <li><a href="/c/5030948/zijeme-vo-veku-hlupakov.html">�ijeme vo veku hlup�kov?</a> 17�980</li>
222
+ <li><img src="/storm/imgs/ico_video_1s.gif" class="ico_uni_l" /><a href="/c/5027440/vtak-ktory-jedol-ludi-jestvoval.html">Vt�k, ktor� jedol �ud�, jestvoval</a> 14�645</li>
223
+ <li><a href="/c/5030764/dnesny-clovek-vznikol-vdaka-vareniu-nie-sexu.html">Dne�n� �lovek vznikol v�aka vareniu. Nie sexu</a> 13�155</li>
224
+ <li><a href="/c/5028827/objavili-syrsky-kralovsky-poklad.html">Objavili s�rsky kr�ovsk� poklad</a> 11�819</li>
225
+ <li><a href="/c/5031698/na-mesiaci-je-voda-potvrdili-to-viacere-misie.html">Na Mesiaci je voda. Potvrdili to viacer� misie</a> 11�513</li>
226
+ <li><a href="/c/5031672/vyvinuli-vakcinu-ktora-dokaze-bojovat-s-hiv.html">Vyvinuli vakc�nu, ktor� dok�e bojova� s HIV</a> 11�012</li>
227
+ <li><a href="/c/5030792/nemcov-oslnili-solarne-panely.html">Nemcov oslnili sol�rne panely</a> 10�797</li>
228
+ <li><img src="/storm/imgs/ico_video_1s.gif" class="ico_uni_l" /><a href="/c/5031325/pandy-by-mali-vymriet-tvrdi-to-prirodovedec.html">Pandy by mali vymrie�. Tvrd� to pr�rodovedec</a> 9�313</li>
229
+ <li><a href="/c/5029357/objavili-novu-cast-velkeho-cinskeho-muru.html">Objavili nov� �as� Ve�k�ho ��nskeho m�ru</a> 6�772</li>
230
+ <li><a href="/c/5028991/ako-vyzera-stred-nasej-galaxie.html">Ako vyzer� stred na�ej galaxie?</a> 6�185</li>
231
+ </ol>
232
+ </div>
233
+ <div class="tabsc" style="display: none" id="tab1_4hod_content">
234
+ <ol>
235
+ <li><img src="/storm/imgs/ico_video_1s.gif" class="ico_uni_l" /><a href="/c/5027440/vtak-ktory-jedol-ludi-jestvoval.html">Vt�k, ktor� jedol �ud�, jestvoval</a> 26�503</li>
236
+ <li><a href="/c/5030948/zijeme-vo-veku-hlupakov.html">�ijeme vo veku hlup�kov?</a> 17�980</li>
237
+ <li><a href="/c/5030764/dnesny-clovek-vznikol-vdaka-vareniu-nie-sexu.html">Dne�n� �lovek vznikol v�aka vareniu. Nie sexu</a> 13�155</li>
238
+ <li><a href="/c/5028827/objavili-syrsky-kralovsky-poklad.html">Objavili s�rsky kr�ovsk� poklad</a> 11�819</li>
239
+ <li><a href="/c/5031698/na-mesiaci-je-voda-potvrdili-to-viacere-misie.html">Na Mesiaci je voda. Potvrdili to viacer� misie</a> 11�513</li>
240
+ <li><a href="/c/5031672/vyvinuli-vakcinu-ktora-dokaze-bojovat-s-hiv.html">Vyvinuli vakc�nu, ktor� dok�e bojova� s HIV</a> 11�012</li>
241
+ <li><a href="/c/5030792/nemcov-oslnili-solarne-panely.html">Nemcov oslnili sol�rne panely</a> 10�797</li>
242
+ <li><img src="/storm/imgs/ico_video_1s.gif" class="ico_uni_l" /><a href="/c/5031325/pandy-by-mali-vymriet-tvrdi-to-prirodovedec.html">Pandy by mali vymrie�. Tvrd� to pr�rodovedec</a> 9�313</li>
243
+ <li><a href="/c/5016600/zomrel-zachranca-miliardy-ludi.html">Zomrel z�chranca miliardy �ud�</a> 7�605</li>
244
+ <li><a href="/c/5029357/objavili-novu-cast-velkeho-cinskeho-muru.html">Objavili nov� �as� Ve�k�ho ��nskeho m�ru</a> 6�772</li>
245
+ </ol>
246
+ </div>
247
+ </div>
248
+ <!--###stp1###-->
249
+ </div>
250
+ <!-- storm-art-najcit-box-end -->
251
+ <!-- end TABy -->
252
+ <div class="floatbox tabbed"><div style="margin: 2px 2px 2px 8px;"><iframe id="netsuccess_stip_iframe" src="/storm_real/extra/nss_tip/show.asp?id=1-10" border="0" width="300" height="1" hspace="0" vspace="0" SCROLLING="no" BORDERCOLOR="#FFFFFF" MARGINWIDTH="0" MARGINHEIGHT="0" FRAMEBORDER="0"></iframe></div></div>
253
+ </div>
254
+ <!-- koniec floatboxw -->
255
+ <div class="cb"></div>
256
+ <script type="text/javascript" src="http://www.sme.sk/storm/jscript/clanok.js?rev=10"></script>
257
+
258
+ <script type="text/javascript">
259
+ addLoadEvent(function(){searchSk();});
260
+ </script>
261
+ <!-- storm-c-box-end -->
262
+ </div>
263
+ <!-- END hlavny obsah stedneho dvojstlpca -->
264
+ </div>
265
+ <!-- END stredny stlpec -->
266
+ <!-- lavy stlpec -->
267
+ <div id="col1w">
268
+ <a href="http://www.sme.sk"><img src="/imgs/title_back2.gif" width="125" height="31" style="border: 0px" alt="sp� na SME.sk" /></a><div id="mainmenu"><ul>
269
+ <li class="menu_section_home"><a href="/" id="st_bm_cat_ext_324">Veda</a></li>
270
+ <li><a href="/r/veda_matema/matematika.html" id="st_bm_cat_veda_matema">Matematika</a></li>
271
+ <li><a href="/r/veda_aktual/aktuality.html" id="st_bm_cat_veda_aktual">Aktuality</a></li>
272
+ <li><a href="/r/veda_vesmir/vesmir.html" id="st_bm_cat_veda_vesmir">Vesm�r</a></li>
273
+ <li><a href="/r/veda_fyzika/fyzika.html" id="st_bm_cat_veda_fyzika">Fyzika</a></li>
274
+ <li><a href="/r/veda_botanik/botanika.html" id="st_bm_cat_veda_botanik">Botanika</a></li>
275
+ <li><a href="/r/veda_dinosau/dinosaury.html" id="st_bm_cat_veda_dinosau">Dinosaury</a></li>
276
+ <li><a href="/r/veda_eko/ekologia.html" id="st_bm_cat_veda_eko">Ekol�gia</a></li>
277
+ <li><a href="/r/veda_histor/historia.html" id="st_bm_cat_veda_histor">Hist�ria</a></li>
278
+ <li><a href="/r/veda_psych/psychologia.html" id="st_bm_cat_veda_psych">Psychol�gia</a></li>
279
+ <li><a href="/r/veda_medic/medicina.html" id="st_bm_cat_veda_medic">Medic�na</a></li>
280
+ <li><a href="/r/veda_archeo/archeologia.html" id="st_bm_cat_veda_archeo">Archeol�gia</a></li>
281
+ <li><a href="/r/veda_zoo/zoologia.html" id="st_bm_cat_veda_zoo">Zool�gia</a></li>
282
+ <li><a href="/r/veda_geo/geologia.html" id="st_bm_cat_veda_geo">Geol�gia</a></li>
283
+ <li><a href="/r/veda_techno/technologie.html" id="st_bm_cat_veda_techno">Technol�gie</a></li>
284
+ <li><a href="/r/veda_profil/profily.html" id="st_bm_cat_veda_profil">Profily</a></li>
285
+ <li><a href="/r/veda_pfoto/press-foto.html" id="st_bm_cat_veda_pfoto">Press foto</a></li>
286
+ <li><a href="/r/veda_recenz/recenzie.html" id="st_bm_cat_veda_recenz">Recenzie</a></li>
287
+ <li style="background: url(/imgs/archiv-menu.gif) no-repeat 45px -2px; overflow: visible; height: 27px; padding-top: 3px;"><a href="http://dennik.sme.sk">DENN�K</a></li>
288
+ </ul>
289
+ </div>
290
+ <div style="text-align: center;"><div id='bmone2n-2337.47.4.64'>
291
+ <noscript><div style='display:inline'><a href='http://ad2.bbmedia.cz/please/redirect/2337/47/4/64/'><img src='http://ad2.bbmedia.cz/please/showit/2337/47/4/64/?typkodu=img' width='300' height='300' style='border-width:0' alt='' /></a></div></noscript>
292
+ </div>
293
+ </div><div id="menu-reklama-1"></div>
294
+ <div id="prilmenu">
295
+ <h5>ANKETA</h5>
296
+ <div id="stbox_ank_veda_hs_el"></div>
297
+ <script type="text/javascript">
298
+ function stbox_ank_veda_hs_set(){}
299
+ var stbox_ank_veda_hs = new stbox_ank_veda_hs_set();
300
+ addLoadEvent (function () {
301
+ storm_anketa_init(stbox_ank_veda_hs);
302
+ stbox_ank_veda_hs.ank_idg = 'veda_hs';
303
+ stbox_ank_veda_hs.ank_id = 0;
304
+ stbox_ank_veda_hs.f_show_num = 1;
305
+ stbox_ank_veda_hs.ank_max_width = 75;
306
+ stbox_ank_veda_hs.id_el = 'stbox_ank_veda_hs_el';
307
+ stbox_ank_veda_hs.obj_variable = 'stbox_ank_veda_hs';
308
+ storm_anketa_render(stbox_ank_veda_hs);
309
+ });
310
+ </script>
311
+ <div class="note"><a href="/anketa_archiv.asp?pol=veda_hs">star�ie ankety</a></div>
312
+ </div>
313
+ <div id="c1blokw">
314
+ <div class="c1blok">
315
+ <h5>PR�LOHY</h5>
316
+ <ul>
317
+ <li><a href="http://pocitace.sme.sk">Po��ta�e</a></li>
318
+ <li><a href="http://mobil.sme.sk">Mobil</a></li>
319
+ <li><a href="http://www.sme.sk/zdravie/">Zdravie</a></li>
320
+ <li><a href="http://auto.sme.sk">Auto - Moto</a></li>
321
+ <li><a href="http://bratislava.sme.sk">Bratislava</a></li>
322
+ <li><a href="http://www.sme.sk/forum/">F�rum</a></li>
323
+ <li><a href="http://www.sme.sk/kariera/">Kari�ra</a></li>
324
+ <li><a href="http://cestovanie.sme.sk">Na cest�ch</a></li>
325
+ <li><a href="http://www.sme.sk/vikend/">V�kend</a></li>
326
+ <li><a href="http://zaujimavosti.sme.sk">Zauj�mavosti</a></li>
327
+ </ul>
328
+ </div>
329
+ <div class="c1blok">
330
+ <h5>PETIT PRESS</h5>
331
+ <ul>
332
+ <li><a href="http://www.sme.sk/dok/tiraz/">Tir�</a></li>
333
+ <li><a href="http://inzercia.sme.sk/">Inzercia</a></li>
334
+ <li><a href="http://predplatne.sme.sk">Predplatn�</a></li>
335
+ <li><a href="http://www.sme.sk/kodex/">Etick� k�dex</a></li>
336
+ </ul>
337
+ </div>
338
+ </div>
339
+ </div>
340
+ </div>
341
+ <!-- END hlavny kontent -->
342
+ <!-- footer -->
343
+ <div id="footer">
344
+ <p><a href="http://www.sme.sk/mapa-stranky/">Mapa str�nky</a> | <a href="http://www.sme.sk/dok/kontakt/">kontakt</a> | <a href="http://www.sme.sk/dok/faq/">pomoc</a> | <a href="#null" onClick="st_openWindow('http://www.sme.sk/footer/homeNS.htm', 'Ako homepage', 'width=500,height=600');"><!--[if IE 5]><a href="#null" onClick="this.style.behavior='url(#default#homepage)'; this.setHomePage('http://veda.sme.sk');"><![endif]-->nastavi� ako homepage <!--[if IE 5]></a><![endif]--></a> | <a href="#" onClick="bookmark_site('www.sme.sk', 'www.sme.sk');">prida� medzi ob��ben�</a> | <a href="#null" onClick="st_openWindow('http://www.sme.sk/footer/win.htm', 'PetitPress','width=500,height=300');">&copy; Petit Press, a.s.</a></p>
345
+ </div>
346
+ <!-- END footer -->
347
+ </div>
348
+
349
+ <div id="helpbox" style="width:200px;text-align:left;visibility:hidden;background-color:#FFFFAA;border:1px solid #940400;font:12px Arial, sans-serif;padding:3px;position:absolute;color:black;"></div>
350
+ <script type="text/javascript">
351
+ <!--
352
+ main_onload();
353
+ // -->
354
+ </script>
355
+
356
+
357
+ <script type="text/javascript">
358
+ st_active_cat = 'veda_eko';
359
+ show_active_menu_categ();
360
+ </script>
361
+ <!-- BBmedia one2many code -->
362
+ <!-- HowTo: All the ad codes must be above this code! -->
363
+ <!-- HowTo: It is good idea to place this code just below the last ad code. -->
364
+ <script type='text/javascript' charset='windows-1250' src='http://ad2.bbmedia.cz/bb/bb_one2n.js?56020090925101'></script>
365
+ <script type='text/javascript'>/* <![CDATA[ */
366
+ bmone2n.addPosition('2337.47.6.20','');
367
+ bmone2n.addPosition('2337.47.2.64','');
368
+ bmone2n.addPosition('2337.47.4.64','');
369
+ bmone2n.getAd('ad2.bbmedia.cz','',''+(typeof(bburlparam)=='string'?'&'+bburlparam:''));
370
+ /* ]]> */</script>
371
+ <div id='bmone2t-2337.47.6.20' style='display:none'><script type='text/javascript'>/* <![CDATA[ */ bmone2n.makeAd('2337.47.6.20'); /* ]]> */</script></div>
372
+ <div id='bmone2t-2337.47.2.64' style='display:none'><script type='text/javascript'>/* <![CDATA[ */ bmone2n.makeAd('2337.47.2.64'); /* ]]> */</script></div>
373
+ <div id='bmone2t-2337.47.4.64' style='display:none'><script type='text/javascript'>/* <![CDATA[ */ bmone2n.makeAd('2337.47.4.64'); /* ]]> */</script></div>
374
+ <script type='text/javascript'>/* <![CDATA[ */ bmone2n.moveAd(); /* ]]> */</script>
375
+ <!-- Gemius SA - gemiusAudience / sme.sk -->
376
+ <script type="text/javascript">
377
+ var pp_gemius_identifier = new String('AfUw5KdgJ28KVrrUYjtEp7e9Dow8ZYLaUcGoNn881W7.J7');
378
+ </script>
379
+ <script type="text/javascript" src="http://services.sme.sk/meranie/genius/xgenius.js"></script>
380
+ <script type="text/javascript">
381
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
382
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
383
+ </script>
384
+ <script type="text/javascript">
385
+ var pageTracker = _gat._getTracker("UA-66869-23"); pageTracker._initData(); pageTracker._trackPageview();
386
+ </script>
387
+ <!-- StartK�d: go.cz.bbelements.com 3.1 Dynamick� + p�edchoz� plugin-detekce test(581) / UnikatiTEST(19) / POST(1) / nestandard(27) -->
388
+ <script type='text/javascript' charset='windows-1250'>
389
+ /* <![CDATA[ */
390
+ var bbs=screen,bbn=navigator,bbh;bbh='&ubl='+bbn.browserLanguage+'&ucc='+bbn.cpuClass+'&ucd='+bbs.colorDepth+'&uce='+bbn.cookieEnabled+'&udx='+bbs.deviceXDPI+'&udy='+bbs.deviceYDPI+'&usl='+bbn.systemLanguage+'&uje='+bbn.javaEnabled()+'&uah='+bbs.availHeight+'&uaw='+bbs.availWidth+'&ubd='+bbs.bufferDepth+'&uhe='+bbs.height+'&ulx='+bbs.logicalXDPI+'&uly='+bbs.logicalYDPI+'&use='+bbs.fontSmoothingEnabled+'&uto='+(new Date()).getTimezoneOffset()+'&uti='+(new Date()).getTime()+'&uui='+bbs.updateInterval+'&uul='+bbn.userLanguage+'&uwi='+bbs.width;
391
+ if(typeof(bburlparam)=='string') { bbh+='&'+bburlparam; }
392
+ if(typeof(bbkeywords)=='string') { bbh+='&keywords='+escape(bbkeywords); }
393
+ document.write("<scr"+"ipt charset='windows-1250' type='text/javascript' src='http://go.cz.bbelements.com/please/showit/581/19/1/27/?typkodu=non-standard"+bbh+"&alttext=0&border=0&bgcolor=FFFFFF&text=000000&link=0000FF&target=_blank&bust="+Math.random()+"'>");
394
+ document.write("<"+"\/scr"+"ipt>");
395
+ /* ]]> */
396
+ </script>
397
+ <!-- KonecK�d: go.cz.bbelements.com 3.1 Dynamick� + p�edchoz� plugin-detekce -->
398
+ </body>
399
+ </html>
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: content_scrapper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Gyorgy Frivolt
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-02-13 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: thoughtbot-shoulda
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 2.10.2
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: mocha
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.8
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: sanitize
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 1.2.0
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: nokogiri
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 1.4.1
54
+ version:
55
+ description: If you want to cut only the content of pages, without any other part (like the menu, header, footer, commercials, etc.), you might find this gem very handy. A DSL is also defined for nifty definitions for your screen scrapping and sanitization.
56
+ email: gyorgy.frivolt@gmail.com
57
+ executables: []
58
+
59
+ extensions: []
60
+
61
+ extra_rdoc_files:
62
+ - LICENSE
63
+ - README.rdoc
64
+ files:
65
+ - .document
66
+ - .gitignore
67
+ - LICENSE
68
+ - README.rdoc
69
+ - Rakefile
70
+ - VERSION
71
+ - config/content_scrapper.rb
72
+ - content_scrapper.gemspec
73
+ - lib/content_scrapper.rb
74
+ - lib/content_scrapper/content_mapping.rb
75
+ - lib/content_scrapper/feedzirra.rb
76
+ - rails/init.rb
77
+ - test/helper.rb
78
+ - test/test_content_mapping.rb
79
+ - test/test_content_scrapper.rb
80
+ - test/test_pages/pretty.html
81
+ - test/test_pages/twocontent.html
82
+ - test/test_pages/ugly.html
83
+ has_rdoc: true
84
+ homepage: http://github.com/fifigyuri/content_scrapper
85
+ licenses: []
86
+
87
+ post_install_message:
88
+ rdoc_options:
89
+ - --charset=UTF-8
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: "0"
97
+ version:
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: "0"
103
+ version:
104
+ requirements: []
105
+
106
+ rubyforge_project:
107
+ rubygems_version: 1.3.5
108
+ signing_key:
109
+ specification_version: 3
110
+ summary: Gem for those who want to screen scrap only the content part of web pages, blogs or articles.
111
+ test_files:
112
+ - test/test_content_mapping.rb
113
+ - test/test_content_scrapper.rb
114
+ - test/helper.rb