webpager 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/webpager.rb +61 -0
  3. metadata +59 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1ac6e8597fd5e840e97fa9a51bffa71fd00d1296
4
+ data.tar.gz: 9280892f178aa0a1f332f09fbb351cf11b8bde71
5
+ SHA512:
6
+ metadata.gz: 1bdf1fd3866c7446dc240e047df5a0c04f224c818b3a78518615dceed167d7d87da04db1fa40b9d92286db5e52f95871d081880e9a1203588b30edbb64fd2d62
7
+ data.tar.gz: ace5d5056c86f679c8aba5eefbfa076996ff8bb8a6c0eea1b008f3f71efb1db26ede1f836a5950d38ea4cbe7b8bdcc0357e6390e66935ba9f0e38c1ab446a01a
@@ -0,0 +1,61 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ # page = Webpager.new('http://seanbehan.com')
5
+ # page.html
6
+ # page.text
7
+ # page.excerpt
8
+ class Webpager
9
+ def initialize(url)
10
+ @url = url
11
+ end
12
+
13
+ def html
14
+ @html ||= open(@url.strip).read
15
+ end
16
+
17
+ def doc
18
+ @doc ||= Nokogiri::HTML(html)
19
+ end
20
+
21
+ def text
22
+ all_tags = /<\/?[^>]+>/i
23
+ ref_tags = /<(a|img)(.*)>/i
24
+ script_tags = /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/i
25
+ style_tags = /<style\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/style>/i
26
+ iframe_tags = /<iframe\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/iframe>/i
27
+ comment_tags = /<!--\b[^<]*(?:(?!<\/script>)<[^<]*)*-->/i
28
+
29
+ regexp = Regexp.union(all_tags, script_tags, style_tags, iframe_tags, comment_tags)
30
+
31
+ body
32
+ .gsub(ref_tags) { |tag| ((links = URI.extract(tag)).any? ? links.join(' ') : '') }
33
+ .gsub(regexp, '')
34
+ .split("\n")
35
+ .map(&:strip)
36
+ .reject(&:blank?)
37
+ .join("\n")
38
+ end
39
+
40
+ def body
41
+ doc.xpath('//body').inner_html
42
+ end
43
+
44
+ def excerptable?(text='')
45
+ text.split('.').size >= 2 && text.size > 100
46
+ end
47
+
48
+ def title
49
+ html.match(/<title>(.*)<\/title>/) { $1 }
50
+ end
51
+
52
+ def favicon
53
+ # doc.xpath('//link/').select { |link| link.value =~ /favi/ }
54
+ end
55
+
56
+ def excerpt
57
+ (doc.xpath('//p').map do |x|
58
+ excerptable?(x.content) ? x.content : nil
59
+ end.compact.first||"").strip
60
+ end
61
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: webpager
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Sean Behan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ description: Write a gem description
28
+ email:
29
+ - inbox@seanbehan.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - webpager.rb
35
+ homepage:
36
+ licenses:
37
+ - MIT
38
+ metadata: {}
39
+ post_install_message:
40
+ rdoc_options: []
41
+ require_paths:
42
+ - .
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements: []
54
+ rubyforge_project:
55
+ rubygems_version: 2.2.1
56
+ signing_key:
57
+ specification_version: 4
58
+ summary: Write a gem summary
59
+ test_files: []