rgabo-readability 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,51 @@
1
+ module Readability
2
+ module Readable
3
+ include Readability::Harmonizable
4
+
5
+ def read_style
6
+ @read_style ||= Readability::Style::NEWSPAPER
7
+ end
8
+
9
+ def read_size
10
+ @read_size ||= Readability::Size::MEDIUM
11
+ end
12
+
13
+ def read_margin
14
+ @read_margin ||= Readability::Margin::MEDIUM
15
+ end
16
+
17
+ attr_writer :read_style, :read_size, :read_margin
18
+
19
+ def to_readable(args = {})
20
+ args[:content_only] ||= false
21
+
22
+ # dup document
23
+ readable_doc = self.dup
24
+
25
+ # remove all script tags
26
+ readable_doc.xpath('//script').each { |node| node.remove }
27
+
28
+ readable_doc.harmony_page do |page|
29
+ # Set parameters
30
+ page.window.readStyle = @read_style
31
+ page.window.readSize = @read_size
32
+ page.window.readMargin = @read_margin
33
+
34
+ # execute readability.js
35
+ page.load(File.join(File.dirname(__FILE__), 'js', 'readability.js'))
36
+ end
37
+
38
+ # return <div id="readInner">...</div> if content_only
39
+ if args[:content_only]
40
+ return readable_doc.at_css("#readInner")
41
+ end
42
+
43
+ # return document root
44
+ readable_doc.root
45
+ end
46
+
47
+ def to_readable!(args = {})
48
+ self.root = to_readable(args)
49
+ end
50
+ end
51
+ end
data/readability.gems ADDED
@@ -0,0 +1,30 @@
1
+ # readability.gems generated gem export file. Note that any env variable settings will be missing. Append these after using a ';' field separator
2
+
3
+ # nokogiri
4
+ nokogiri -v1.4.1
5
+
6
+ # harmony (johnson & envjs)
7
+ stackdeck -v0.2.0
8
+ johnson -v2.0.0.pre3
9
+ envjs -v0.3.1
10
+ harmony -v0.5.5
11
+
12
+ # tomdoc
13
+ hoe -v2.6.0
14
+ ParseTree -v3.0.5
15
+ RubyInline -v3.7.0
16
+ ruby_parser -v2.0.4
17
+ sexp_processor -v3.0.4
18
+ colored -v1.2
19
+ tomdoc -v0.1.0
20
+
21
+ # jeweler
22
+ gemcutter -v0.5.0
23
+ git -v1.2.5
24
+ json_pure -v1.4.3
25
+ rubyforge -v2.0.4
26
+ jeweler -v1.4.0
27
+
28
+ # rspec
29
+ rspec -v1.3.0
30
+ syntax -v1.0.0
@@ -0,0 +1,72 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{rgabo-readability}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Gabor Ratky"]
12
+ s.date = %q{2010-05-16}
13
+ s.description = %q{Extends Nokogiri::HTML::Document to run Arc90's Readability and procude easy to read HTML documents.}
14
+ s.email = %q{rgabo@rgabostyle.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "example.rb",
27
+ "lib/readability.rb",
28
+ "lib/readability/harmonizable.rb",
29
+ "lib/readability/js/readability.js",
30
+ "lib/readability/readable.rb",
31
+ "readability.gems",
32
+ "rgabo-readability.gemspec",
33
+ "spec/files/change_title.js",
34
+ "spec/files/tomdoc-reasonable-ruby-documentation.html",
35
+ "spec/readability/harmonizable_spec.rb",
36
+ "spec/readability/readable_spec.rb",
37
+ "spec/readability_spec.rb",
38
+ "spec/spec.opts",
39
+ "spec/spec_helper.rb"
40
+ ]
41
+ s.homepage = %q{http://github.com/rgabo/readability}
42
+ s.rdoc_options = ["--charset=UTF-8"]
43
+ s.require_paths = ["lib"]
44
+ s.rubygems_version = %q{1.3.7}
45
+ s.summary = %q{Run Arc90's Readability on Nokogiri documents}
46
+ s.test_files = [
47
+ "spec/readability/harmonizable_spec.rb",
48
+ "spec/readability/readable_spec.rb",
49
+ "spec/readability_spec.rb",
50
+ "spec/spec_helper.rb"
51
+ ]
52
+
53
+ if s.respond_to? :specification_version then
54
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
55
+ s.specification_version = 3
56
+
57
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
58
+ s.add_development_dependency(%q<rspec>, [">= 1.3.0"])
59
+ s.add_runtime_dependency(%q<harmony>, ["= 0.5.5"])
60
+ s.add_runtime_dependency(%q<nokogiri>, ["~> 1.4"])
61
+ else
62
+ s.add_dependency(%q<rspec>, [">= 1.3.0"])
63
+ s.add_dependency(%q<harmony>, ["= 0.5.5"])
64
+ s.add_dependency(%q<nokogiri>, ["~> 1.4"])
65
+ end
66
+ else
67
+ s.add_dependency(%q<rspec>, [">= 1.3.0"])
68
+ s.add_dependency(%q<harmony>, ["= 0.5.5"])
69
+ s.add_dependency(%q<nokogiri>, ["~> 1.4"])
70
+ end
71
+ end
72
+
@@ -0,0 +1 @@
1
+ document.title = "Oops, I did it again."
@@ -0,0 +1,123 @@
1
+ <!DOCTYPE html>
2
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en-us">
3
+ <head>
4
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
5
+ <title>TomDoc - Reasonable Ruby Documentation</title>
6
+ <meta name="author" content="Tom Preston-Werner" />
7
+ <link href="http://feeds.feedburner.com/tom-preston-werner" rel="alternate" title="Tom Preston-Werner" type="application/atom+xml" />
8
+
9
+ <!-- syntax highlighting CSS -->
10
+ <link rel="stylesheet" href="/css/syntax.css" type="text/css" />
11
+
12
+ <!-- Homepage CSS -->
13
+ <link rel="stylesheet" href="/css/screen.css" type="text/css" media="screen, projection" />
14
+
15
+ <!-- Typekit -->
16
+ <script type="text/javascript" src="http://use.typekit.com/jpd0pfm.js"></script>
17
+ <script type="text/javascript">try{Typekit.load();}catch(e){}</script>
18
+ </head>
19
+ <body>
20
+
21
+ <!-- ClickTale Top part -->
22
+ <script type="text/javascript">
23
+ var WRInitTime=(new Date()).getTime();
24
+ </script>
25
+ <!-- ClickTale end of Top part -->
26
+
27
+ <div class="site">
28
+ <div class="title">
29
+ <a href="/">Tom Preston-Werner</a>
30
+ <a class="extra" href="/">home</a>
31
+ </div>
32
+
33
+ <div id="post">
34
+ <h1>TomDoc &#8211; Reasonable Ruby Documentation</h1>
35
+ <p class="meta">11 May 2010 &#8211; San Francisco</p>
36
+ <p><a href="http://rdoc.rubyforge.org">RDoc</a> is an abomination. It&#8217;s ugly to read in plain text, requires the use of the inane :nodoc: tag to prevent private method documentation from showing up in final rendering, and does nothing to encourage complete or unambiguous documentation of classes, methods, or parameters. <a href="http://yardoc.org"><span class="caps">YARD</span></a> is much better but goes too far in the other direction (and still doesn&#8217;t look good in plain text). Providing an explicit way to specify parameters and types is great, but having to remember a bunch of strict tag names in order to be compliant is not a good way to encourage coders to write documentation. And again we see a @private tag that&#8217;s necessary to hide docs from the final render.</p>
37
+ <p>Three years ago, after suffering with these existing documentation formats for far too long, I started using my own documentation format. It looked a bit like RDoc but had a set of conventions for specifying parameters, return values, and the expected types. It used plain language and full sentences so that a human could read and understand it without having to parse machine-oriented tags or crufty markup. I called this format TomDoc, because if Linus can name stuff after himself, then why can&#8217;t I?</p>
38
+ <p>After years in the making, TomDoc is finally a well specified documentation format. You can find the full spec at <a href="http://tomdoc.org">http://tomdoc.org</a>.</p>
39
+ <p>But enough talk. Here&#8217;s a sample of what a TomDoc&#8217;d method might look like:</p>
40
+ <div class="highlight"><pre><code class="ruby"><span class="c1"># Public: Duplicate some text an abitrary number of times.</span>
41
+ <span class="c1">#</span>
42
+ <span class="c1"># text - The String to be duplicated.</span>
43
+ <span class="c1"># count - The Integer number of times to duplicate the text.</span>
44
+ <span class="c1">#</span>
45
+ <span class="c1"># Examples</span>
46
+ <span class="c1">#</span>
47
+ <span class="c1"># multiplex(&#39;Tom&#39;, 4)</span>
48
+ <span class="c1"># # =&gt; &#39;TomTomTomTom&#39;</span>
49
+ <span class="c1">#</span>
50
+ <span class="c1"># Returns the duplicated String.</span>
51
+ <span class="k">def</span> <span class="nf">multiplex</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">count</span><span class="p">)</span>
52
+ <span class="n">text</span> <span class="o">*</span> <span class="n">count</span>
53
+ <span class="k">end</span>
54
+ </code></pre>
55
+ </div><p>At first glance you&#8217;ll notice a few things. First, and most important, is that the documentation looks nice in plain text. When I&#8217;m working on a project, I need to be able to scan and read method documentation quickly. Littering the docs with tags and markup (especially <span class="caps">HTML</span> markup) is not acceptable. Code documentation should be optimized for human consumption. Second, all parameters and return values, and their expected types are specified. Types are generally denoted by class name. Because Ruby is so flexible, you are not constrained by a rigid type declaration syntax and are free to explain precisely how the expected types may vary under different circumstances. Finally, the basic layout is designed to be easy to remember. Once you commit a few simple conventions to memory, writing documentation becomes second nature, with all of the tricky decision making already done for you.</p>
56
+ <p>Today&#8217;s Ruby libraries suffer deeply from haphazard versioning schemes. Even RubyGems itself does not follow a sane or predictable versioning pattern. This lack of discipline stems from the absence of well defined Public APIs. TomDoc attempts to solve this problem by making it simple to define an unambiguous Public <span class="caps">API</span> for your library. Instead of assuming that all classes and methods are intended for public consumption, TomDoc makes the Public <span class="caps">API</span> opt-in. To denote that something is public, all you have to do is preface the main description with &#8220;Public:&#8221;. By forcing you to explicitly state that a class or method is intended for public consumption, a deliberate and thoughtful Public <span class="caps">API</span> is automatically constructed that can inform disciplined version changes according to the tenets of <a href="http://semver.org">Semantic Versioning</a>. In addition, the prominent display of &#8220;Public&#8221; in a method description ensures that developers are made aware of the sensitive nature of the method and do not carelessly change the signature of something in the Public <span class="caps">API</span>.</p>
57
+ <p>Once a Public <span class="caps">API</span> has been established, some very exciting things become possible. We&#8217;re currently working on a processing tool that will render TomDoc into various forms (terminal, <span class="caps">HTML</span>, etc). If you run this tool on a library, you&#8217;ll get a printout of the Public <span class="caps">API</span> documentation. You can publish this online so that others have easy access to it. When you roll a new version of the library, you can run the tool again, giving it a prior version as a base, and have it automatically display only the methods that have changed. This diff will be extremely useful for users while they upgrade to the new version (or so they can evaluate whether an upgrade is warrented)!</p>
58
+ <p>While I&#8217;ve been using various nascent forms of TomDoc for several years, we&#8217;re just now starting to adopt it for everything we do at GitHub. Now that I&#8217;ve formalized the spec it will be easy for the entire team to write compliant TomDoc. The goal is to have every class, method, and accessor of every GitHub library documented. In the future, once we have proper tooling, we&#8217;d even like to create a unit test that will fail if anything is missing documentation.</p>
59
+ <p>TomDoc is still a rough specification so I&#8217;m initially releasing it as 0.9.0. Over the coming months I&#8217;ll make any necessary changes to address user concerns and release a 1.0.0 version once things have stabilized. If you&#8217;d like to suggest changes, please open an issue on the <a href="http://github.com/mojombo/tomdoc">TomDoc GitHub repository</a>.</p>
60
+ </div>
61
+
62
+ <div id="related">
63
+ <h2>Related Posts</h2>
64
+ <ul class="posts">
65
+
66
+ <li><span>19 May 2009</span> &raquo; <a href="/2009/05/19/the-git-parable.html">The Git Parable</a></li>
67
+
68
+ <li><span>17 Nov 2008</span> &raquo; <a href="/2008/11/17/blogging-like-a-hacker.html">Blogging Like a Hacker</a></li>
69
+
70
+ <li><span>03 Nov 2008</span> &raquo; <a href="/2008/11/03/how-to-meet-your-next-cofounder.html">How to Meet Your Next Cofounder</a></li>
71
+
72
+ </ul>
73
+ </div>
74
+
75
+ <div class="footer">
76
+ <div class="contact">
77
+ <p>
78
+ Tom Preston-Werner<br />
79
+ Cofounder of <a href="http://github.com/">GitHub</a><br />
80
+ tom@mojombo.com
81
+ </p>
82
+ </div>
83
+ <div class="contact">
84
+ <p>
85
+ <a href="http://github.com/mojombo/">github.com/mojombo</a><br />
86
+ <a href="http://twitter.com/mojombo/">twitter.com/mojombo</a><br />
87
+ <a href="http://flickr.com/photos/mojombo/">flickr.com/photos/mojombo</a>
88
+ </p>
89
+ </div>
90
+ <div class="rss">
91
+ <a href="http://feeds.feedburner.com/tom-preston-werner">
92
+ <img src="/images/rss.png" alt="Subscribe to RSS Feed" />
93
+ </a>
94
+ </div>
95
+ </div>
96
+ </div>
97
+
98
+ <a href="http://github.com/mojombo"><img style="position: absolute; top: 0; right: 0; border: 0;" src="http://s3.amazonaws.com/github/ribbons/forkme_right_red_aa0000.png" alt="Fork me on GitHub" /></a>
99
+
100
+ <!-- ClickTale Bottom part -->
101
+ <div id="ClickTaleDiv" style="display: none;"></div>
102
+ <script type="text/javascript">
103
+ if(document.location.protocol!='https:')
104
+ document.write(unescape("%3Cscript%20src='http://s.clicktale.net/WRb.js'%20type='text/javascript'%3E%3C/script%3E"));
105
+ </script>
106
+ <script type="text/javascript">
107
+ if(typeof ClickTale=='function') ClickTale(206,0.3,"www03");
108
+ </script>
109
+ <!-- ClickTale end of Bottom part -->
110
+
111
+ <!-- Google Analytics -->
112
+ <script type="text/javascript">
113
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
114
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
115
+ </script>
116
+ <script type="text/javascript">
117
+ var pageTracker = _gat._getTracker("UA-6016902-1");
118
+ pageTracker._trackPageview();
119
+ </script>
120
+ <!-- Google Analytics end -->
121
+
122
+ </body>
123
+ </html>
@@ -0,0 +1,36 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'open-uri'
3
+
4
+ describe Readability::Harmonizable do
5
+ before :each do
6
+ @doc = Nokogiri::HTML(open(File.dirname(__FILE__) + '/../files/tomdoc-reasonable-ruby-documentation.html'))
7
+ end
8
+
9
+ it "extends Nokogiri::HTML::Document" do
10
+ Nokogiri::HTML::Document.include?(Readability::Harmonizable).should be_true
11
+ end
12
+
13
+ it "allows access to the DOM" do
14
+ @doc.window.should_not be_nil
15
+ @doc.window.document.should_not be_nil
16
+ end
17
+
18
+ it "allows changes to the DOM" do
19
+ @doc.window do |window|
20
+ window.document.title = "foobar"
21
+ end
22
+
23
+ @doc.window.document.title.should == "foobar"
24
+ end
25
+
26
+ it "executes javascript code on the document" do
27
+ # check original title
28
+ @doc.window.document.title.should == "TomDoc - Reasonable Ruby Documentation"
29
+
30
+ # set new title
31
+ @doc.execute_js("document.title = 'foobar'")
32
+
33
+ # document.title should have new title
34
+ @doc.window.document.title.should == "foobar"
35
+ end
36
+ end
@@ -0,0 +1,50 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe Readability::Readable do
4
+ before :each do
5
+ @doc = Nokogiri::HTML(open(File.dirname(__FILE__) + '/../files/tomdoc-reasonable-ruby-documentation.html'))
6
+ end
7
+
8
+ it "extends Nokogiri::HTML::Document" do
9
+ Nokogiri::HTML::Document.include?(Readability::Readable).should be_true
10
+ end
11
+
12
+ it "can set Readability variables" do
13
+ @doc.read_style = Readability::Style::NEWSPAPER
14
+ @doc.read_size = Readability::Size::MEDIUM
15
+ @doc.read_margin = Readability::Margin::MEDIUM
16
+
17
+ @doc.read_style.should == Readability::Style::NEWSPAPER
18
+ @doc.read_size.should == Readability::Size::MEDIUM
19
+ @doc.read_margin.should == Readability::Margin::MEDIUM
20
+ end
21
+
22
+ it "can run Readability and return resulting document" do
23
+ # original document includes link to flickr
24
+ @doc.to_html.should include "flickr.com/photos/mojombo"
25
+
26
+ readable_doc = @doc.to_readable
27
+
28
+ # readable version should not include the link to flickr
29
+ readable_doc.should_not be_nil
30
+ readable_doc.to_html.should_not include "flickr.com/photos/mojombo"
31
+ end
32
+
33
+ it "can run Readability in place" do
34
+ # original document includes link to flickr
35
+ @doc.to_html.should include "flickr.com/photos/mojombo"
36
+
37
+ @doc.to_readable!
38
+
39
+ # readable version should not include the link to flickr
40
+ @doc.to_html.should_not include "flickr.com/photos/mojombo"
41
+ end
42
+
43
+ it "can return the content only" do
44
+ content = @doc.to_readable(:content_only => false)
45
+ content.to_html.should include "Original Page"
46
+
47
+ content = @doc.to_readable(:content_only => true)
48
+ content.to_html.should_not include "Original Page"
49
+ end
50
+ end
@@ -0,0 +1,4 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Readability do
4
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'readability'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,136 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rgabo-readability
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Gabor Ratky
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-05-16 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rspec
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 27
30
+ segments:
31
+ - 1
32
+ - 3
33
+ - 0
34
+ version: 1.3.0
35
+ type: :development
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: harmony
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - "="
44
+ - !ruby/object:Gem::Version
45
+ hash: 1
46
+ segments:
47
+ - 0
48
+ - 5
49
+ - 5
50
+ version: 0.5.5
51
+ type: :runtime
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ name: nokogiri
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ hash: 7
62
+ segments:
63
+ - 1
64
+ - 4
65
+ version: "1.4"
66
+ type: :runtime
67
+ version_requirements: *id003
68
+ description: Extends Nokogiri::HTML::Document to run Arc90's Readability and procude easy to read HTML documents.
69
+ email: rgabo@rgabostyle.com
70
+ executables: []
71
+
72
+ extensions: []
73
+
74
+ extra_rdoc_files:
75
+ - LICENSE
76
+ - README.rdoc
77
+ files:
78
+ - .document
79
+ - .gitignore
80
+ - LICENSE
81
+ - README.rdoc
82
+ - Rakefile
83
+ - VERSION
84
+ - example.rb
85
+ - lib/readability.rb
86
+ - lib/readability/harmonizable.rb
87
+ - lib/readability/js/readability.js
88
+ - lib/readability/readable.rb
89
+ - readability.gems
90
+ - rgabo-readability.gemspec
91
+ - spec/files/change_title.js
92
+ - spec/files/tomdoc-reasonable-ruby-documentation.html
93
+ - spec/readability/harmonizable_spec.rb
94
+ - spec/readability/readable_spec.rb
95
+ - spec/readability_spec.rb
96
+ - spec/spec.opts
97
+ - spec/spec_helper.rb
98
+ has_rdoc: true
99
+ homepage: http://github.com/rgabo/readability
100
+ licenses: []
101
+
102
+ post_install_message:
103
+ rdoc_options:
104
+ - --charset=UTF-8
105
+ require_paths:
106
+ - lib
107
+ required_ruby_version: !ruby/object:Gem::Requirement
108
+ none: false
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ hash: 3
113
+ segments:
114
+ - 0
115
+ version: "0"
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ hash: 3
122
+ segments:
123
+ - 0
124
+ version: "0"
125
+ requirements: []
126
+
127
+ rubyforge_project:
128
+ rubygems_version: 1.3.7
129
+ signing_key:
130
+ specification_version: 3
131
+ summary: Run Arc90's Readability on Nokogiri documents
132
+ test_files:
133
+ - spec/readability/harmonizable_spec.rb
134
+ - spec/readability/readable_spec.rb
135
+ - spec/readability_spec.rb
136
+ - spec/spec_helper.rb