html_text_gem 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/bin/html_text_gem +7 -0
  2. data/lib/html_text_gem.rb +36 -0
  3. metadata +47 -0
data/bin/html_text_gem ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/evn ruby
2
+
3
+ require "rubygems"
4
+ require "nokogiri"
5
+ #require 'open-uri'
6
+ require "html_text_gem"
7
+
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/evn ruby
2
+ require "rubygems"
3
+ require 'nokogiri'
4
+ #require 'open-uri'
5
+
6
+ def html_to_text(node)
7
+ blocks = %w[div] # put newlines after
8
+ separator = { "br"=>"\n", "br"=>"\n#{'-' * 70}\n" } # content separators
9
+ dup = node.dup
10
+
11
+ # remove whitespaces
12
+ dup.xpath('.//text()').each{ |t| t.content=t.text.gsub(/>\s+</, " ")}
13
+
14
+ # extract urls
15
+ element = dup.at_xpath('//a[text()]')
16
+ element["href"]
17
+
18
+ # swap out the separator
19
+ dup.css(separator.keys.join(',')).each{ |n| n.replace( separator[n.name] ) }
20
+
21
+ # add newlines after each block level element
22
+ dup.css(blocks.join(',')).each{ |n| n.after("\n\n") }
23
+
24
+ # return modified text content
25
+ return dup.text
26
+ end
27
+
28
+ doc = Nokogiri::HTML(open('index.html'), nil, 'UTF-8')
29
+ parse = html_to_text(doc)
30
+ #puts html_to_text(doc)
31
+
32
+ # write to text file
33
+ File.write("snippet.txt", parse)
34
+
35
+
36
+
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: html_text_gem
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Vy Nguyen
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-12-10 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A gem to create text files
15
+ email: vnguye36@gmail.com
16
+ executables:
17
+ - html_text_gem
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/html_text_gem.rb
22
+ - bin/html_text_gem
23
+ homepage: https://rubygems.org/profiles/maxdoodle
24
+ licenses: []
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ! '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ none: false
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubyforge_project:
43
+ rubygems_version: 1.8.25
44
+ signing_key:
45
+ specification_version: 3
46
+ summary: Making a test gem
47
+ test_files: []