html_text_gem 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/html_text_gem +7 -0
- data/lib/html_text_gem.rb +36 -0
- metadata +47 -0
data/bin/html_text_gem
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/evn ruby
|
2
|
+
require "rubygems"
|
3
|
+
require 'nokogiri'
|
4
|
+
#require 'open-uri'
|
5
|
+
|
6
|
+
def html_to_text(node)
|
7
|
+
blocks = %w[div] # put newlines after
|
8
|
+
separator = { "br"=>"\n", "br"=>"\n#{'-' * 70}\n" } # content separators
|
9
|
+
dup = node.dup
|
10
|
+
|
11
|
+
# remove whitespaces
|
12
|
+
dup.xpath('.//text()').each{ |t| t.content=t.text.gsub(/>\s+</, " ")}
|
13
|
+
|
14
|
+
# extract urls
|
15
|
+
element = dup.at_xpath('//a[text()]')
|
16
|
+
element["href"]
|
17
|
+
|
18
|
+
# swap out the separator
|
19
|
+
dup.css(separator.keys.join(',')).each{ |n| n.replace( separator[n.name] ) }
|
20
|
+
|
21
|
+
# add newlines after each block level element
|
22
|
+
dup.css(blocks.join(',')).each{ |n| n.after("\n\n") }
|
23
|
+
|
24
|
+
# return modified text content
|
25
|
+
return dup.text
|
26
|
+
end
|
27
|
+
|
28
|
+
doc = Nokogiri::HTML(open('index.html'), nil, 'UTF-8')
|
29
|
+
parse = html_to_text(doc)
|
30
|
+
#puts html_to_text(doc)
|
31
|
+
|
32
|
+
# write to text file
|
33
|
+
File.write("snippet.txt", parse)
|
34
|
+
|
35
|
+
|
36
|
+
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: html_text_gem
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Vy Nguyen
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-12-10 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: A gem to create text files
|
15
|
+
email: vnguye36@gmail.com
|
16
|
+
executables:
|
17
|
+
- html_text_gem
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- lib/html_text_gem.rb
|
22
|
+
- bin/html_text_gem
|
23
|
+
homepage: https://rubygems.org/profiles/maxdoodle
|
24
|
+
licenses: []
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ! '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
none: false
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubyforge_project:
|
43
|
+
rubygems_version: 1.8.25
|
44
|
+
signing_key:
|
45
|
+
specification_version: 3
|
46
|
+
summary: Making a test gem
|
47
|
+
test_files: []
|