webpager 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/webpager.rb +61 -0
  3. metadata +59 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1ac6e8597fd5e840e97fa9a51bffa71fd00d1296
4
+ data.tar.gz: 9280892f178aa0a1f332f09fbb351cf11b8bde71
5
+ SHA512:
6
+ metadata.gz: 1bdf1fd3866c7446dc240e047df5a0c04f224c818b3a78518615dceed167d7d87da04db1fa40b9d92286db5e52f95871d081880e9a1203588b30edbb64fd2d62
7
+ data.tar.gz: ace5d5056c86f679c8aba5eefbfa076996ff8bb8a6c0eea1b008f3f71efb1db26ede1f836a5950d38ea4cbe7b8bdcc0357e6390e66935ba9f0e38c1ab446a01a
@@ -0,0 +1,61 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ # page = Webpager.new('http://seanbehan.com')
5
+ # page.html
6
+ # page.text
7
+ # page.excerpt
8
+ class Webpager
9
+ def initialize(url)
10
+ @url = url
11
+ end
12
+
13
+ def html
14
+ @html ||= open(@url.strip).read
15
+ end
16
+
17
+ def doc
18
+ @doc ||= Nokogiri::HTML(html)
19
+ end
20
+
21
+ def text
22
+ all_tags = /<\/?[^>]+>/i
23
+ ref_tags = /<(a|img)(.*)>/i
24
+ script_tags = /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/i
25
+ style_tags = /<style\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/style>/i
26
+ iframe_tags = /<iframe\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/iframe>/i
27
+ comment_tags = /<!--\b[^<]*(?:(?!<\/script>)<[^<]*)*-->/i
28
+
29
+ regexp = Regexp.union(all_tags, script_tags, style_tags, iframe_tags, comment_tags)
30
+
31
+ body
32
+ .gsub(ref_tags) { |tag| ((links = URI.extract(tag)).any? ? links.join(' ') : '') }
33
+ .gsub(regexp, '')
34
+ .split("\n")
35
+ .map(&:strip)
36
+ .reject(&:blank?)
37
+ .join("\n")
38
+ end
39
+
40
+ def body
41
+ doc.xpath('//body').inner_html
42
+ end
43
+
44
+ def excerptable?(text='')
45
+ text.split('.').size >= 2 && text.size > 100
46
+ end
47
+
48
+ def title
49
+ html.match(/<title>(.*)<\/title>/) { $1 }
50
+ end
51
+
52
+ def favicon
53
+ # doc.xpath('//link/').select { |link| link.value =~ /favi/ }
54
+ end
55
+
56
+ def excerpt
57
+ (doc.xpath('//p').map do |x|
58
+ excerptable?(x.content) ? x.content : nil
59
+ end.compact.first||"").strip
60
+ end
61
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: webpager
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Sean Behan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ description: Write a gem description
28
+ email:
29
+ - inbox@seanbehan.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - webpager.rb
35
+ homepage:
36
+ licenses:
37
+ - MIT
38
+ metadata: {}
39
+ post_install_message:
40
+ rdoc_options: []
41
+ require_paths:
42
+ - .
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements: []
54
+ rubyforge_project:
55
+ rubygems_version: 2.2.1
56
+ signing_key:
57
+ specification_version: 4
58
+ summary: Write a gem summary
59
+ test_files: []