Html2Feedbooks 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +13 -0
- data/bin/html2fb.rb +63 -0
- data/confs/conf.yaml +25 -0
- data/lib/app.rb +93 -0
- data/lib/conf.rb +25 -0
- data/lib/document.rb +71 -0
- data/lib/downloader.rb +24 -0
- data/lib/feedbooks.rb +128 -0
- data/lib/parser.rb +335 -0
- metadata +102 -0
data/README
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
HTML2Feedbooks
|
2
|
+
==============
|
3
|
+
|
4
|
+
A script to automate basic publishing work on Feedbooks.com.
|
5
|
+
|
6
|
+
Usage
|
7
|
+
-----
|
8
|
+
|
9
|
+
./html2fb URL.html
|
10
|
+
|
11
|
+
You can change some settings by creating your own configuration file and using
|
12
|
+
|
13
|
+
html2fb -c myconf.yaml URL.html
|
data/bin/html2fb.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'conf.rb'
|
5
|
+
require 'downloader.rb'
|
6
|
+
require 'document.rb'
|
7
|
+
require 'parser.rb'
|
8
|
+
require 'feedbooks.rb'
|
9
|
+
require 'tmpdir'
|
10
|
+
require 'launchy'
|
11
|
+
|
12
|
+
include HTML2FB
|
13
|
+
|
14
|
+
options = {}
|
15
|
+
options[:conf] = "conf.yaml"
|
16
|
+
options[:preview] = true
|
17
|
+
options[:conv] = true
|
18
|
+
OptionParser.new do |opts|
|
19
|
+
opts.banner = "Usage: html2fb [options] URL"
|
20
|
+
|
21
|
+
opts.on("-c", "--conf FILE", String,"Configuration file") do |f|
|
22
|
+
options[:conf] = f
|
23
|
+
end
|
24
|
+
opts.on("-s", "-s","Send to feedbooks") do |f|
|
25
|
+
options[:preview] = !f
|
26
|
+
end
|
27
|
+
opts.on("-nc", "--no-conv","No charset conversion") do |f|
|
28
|
+
options[:conv] = !f
|
29
|
+
end
|
30
|
+
end.parse!
|
31
|
+
|
32
|
+
valid=false
|
33
|
+
entry=ARGV[0]
|
34
|
+
while !valid
|
35
|
+
url=nil
|
36
|
+
begin
|
37
|
+
url=Downloader.valid_url?(entry)
|
38
|
+
valid=true
|
39
|
+
rescue Exception => e
|
40
|
+
STDERR.puts 'Invalid URL' unless entry.nil? || entry==''
|
41
|
+
valid=false
|
42
|
+
puts e
|
43
|
+
end
|
44
|
+
print "URL : " if entry.nil? || entry==''
|
45
|
+
entry=STDIN.readline.strip unless valid
|
46
|
+
end
|
47
|
+
conf=Conf.new(options[:conf],options[:conv])
|
48
|
+
content=Downloader.download(url)
|
49
|
+
#puts content.size
|
50
|
+
doc=Parser.new(conf).parse(content)
|
51
|
+
puts doc.toc.to_yaml
|
52
|
+
if options[:preview]
|
53
|
+
page=File.join(Dir.tmpdir(),Digest::MD5.hexdigest(url.to_s))+'.html'
|
54
|
+
f=File.open(page,'w')
|
55
|
+
f.write doc.to_html
|
56
|
+
f.close
|
57
|
+
puts "A preview of the parsed file should be opening in your webbrowser now"
|
58
|
+
puts "If nothing open you can open the file located at : #{page}"
|
59
|
+
puts "When happy with the parsed output rerun with -s option to send to Feedbooks.com"
|
60
|
+
Launchy::Browser.run(page)
|
61
|
+
else
|
62
|
+
doc.to_feedbooks(conf)
|
63
|
+
end
|
data/confs/conf.yaml
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
remove:
|
2
|
+
class:
|
3
|
+
- totoc
|
4
|
+
- pagenum
|
5
|
+
- totoi
|
6
|
+
- img
|
7
|
+
- pg
|
8
|
+
expr:
|
9
|
+
- 'table'
|
10
|
+
- //pre
|
11
|
+
- hr
|
12
|
+
|
13
|
+
select:
|
14
|
+
- expr: h2
|
15
|
+
fblevel: Part
|
16
|
+
select:
|
17
|
+
- expr: h3
|
18
|
+
fblevel: Chapter
|
19
|
+
|
20
|
+
fb:
|
21
|
+
user: #ask#
|
22
|
+
bookid: #ask#
|
23
|
+
booktype: #ask#
|
24
|
+
pass: #ask#
|
25
|
+
host: 'feedbooks.com'
|
data/lib/app.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'digest/md5'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'net/http'
|
4
|
+
require 'time'
|
5
|
+
require 'htmlentities'
|
6
|
+
=begin
|
7
|
+
def colour(text, colour_code)
|
8
|
+
"#{colour_code}#{text}\e[0m"
|
9
|
+
end
|
10
|
+
def green(text); colour(text, "\e[32m"); end
|
11
|
+
def red(text); colour(text, "\e[31m"); end
|
12
|
+
def yellow(text); colour(text, "\e[33m"); end
|
13
|
+
def blue(text); colour(text, "\e[34m"); end
|
14
|
+
=end
|
15
|
+
|
16
|
+
class AtomPost
|
17
|
+
attr_accessor :title
|
18
|
+
attr_accessor :content
|
19
|
+
attr_accessor :date
|
20
|
+
attr_accessor :author
|
21
|
+
attr_accessor :addr
|
22
|
+
attr_accessor :user
|
23
|
+
attr_accessor :pass
|
24
|
+
attr_accessor :type
|
25
|
+
|
26
|
+
def initialize(addrs=nil)
|
27
|
+
self.addr=addrs unless addrs.nil?
|
28
|
+
end
|
29
|
+
|
30
|
+
def down_url(entry_url)
|
31
|
+
#STDERR.puts "scanning #{entry_url}"
|
32
|
+
url=URI.parse(entry_url)
|
33
|
+
Net::HTTP.start(url.host,url.port) {|http|
|
34
|
+
req = Net::HTTP::Get.new(url.path)
|
35
|
+
req.basic_auth user,pass unless user.nil?
|
36
|
+
response = http.request(req)
|
37
|
+
doc=Hpricot(response.body)
|
38
|
+
e=doc.at('//entry').at('link[@rel="down"]')
|
39
|
+
return URI.parse(e[:href]).path unless e.nil?
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
def send
|
44
|
+
raise StandardError.new('Missing Address') if addr.nil?
|
45
|
+
#3: Detailed control
|
46
|
+
url = URI.parse(addr)
|
47
|
+
#STDERR.puts "sending to #{url}"
|
48
|
+
req = Net::HTTP::Post.new(url.path)
|
49
|
+
req.basic_auth user,pass unless user.nil?
|
50
|
+
|
51
|
+
req.body = '<?xml version="1.0"?>'+"\n"
|
52
|
+
req.body +='<entry xmlns="http://www.w3.org/2005/Atom">'+"\n"
|
53
|
+
req.body +='<title>'+decode_text(title)+'</title>'+"\n"
|
54
|
+
req.body +='<id>'+Digest::MD5.hexdigest(title+content)+'</id>'+"\n"
|
55
|
+
req.body +='<updated>'+date.xmlschema+'</updated>'+"\n"
|
56
|
+
req.body +='<author><name>'+author+'</name></author>'+"\n"
|
57
|
+
req.body +='<content>'+recode_text(content)+'</content>'+"\n"
|
58
|
+
req.body +='<category label="'+type+'" term="'+type+'" />'+"\n" unless type.nil?
|
59
|
+
req.body +='</entry>'+"\n"
|
60
|
+
|
61
|
+
req.set_content_type('application/atom+xml;type=entry')
|
62
|
+
|
63
|
+
# STDERR.puts red("Send \n #{req.body.size > 500 ? req.body[0..250]+'[...]'+req.body[-250..-1]: req.body}")
|
64
|
+
|
65
|
+
res = Net::HTTP.new(url.host, url.port).start {|http| http.request(req) }
|
66
|
+
case res
|
67
|
+
when Net::HTTPSuccess, Net::HTTPRedirection
|
68
|
+
# STDERR.puts green(res['location']) if res['location']
|
69
|
+
res['location'] if res['location']
|
70
|
+
else
|
71
|
+
res.error!
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def recode_text(txt)
|
76
|
+
return txt if txt.blank?
|
77
|
+
m=Hpricot(txt)
|
78
|
+
m.traverse_text{|t| t.content=force_decimal_entities(t.content) if t.content.match(/&[a-z][a-z0-9]+;/i)}
|
79
|
+
m.to_html
|
80
|
+
end
|
81
|
+
HTMLENCODER=HTMLEntities.new
|
82
|
+
def force_decimal_entities(txt)
|
83
|
+
HTMLENCODER.encode(HTMLENCODER.decode(txt),:decimal)
|
84
|
+
end
|
85
|
+
|
86
|
+
def decode_text(txt)
|
87
|
+
return txt if txt.blank?
|
88
|
+
m=Hpricot(txt)
|
89
|
+
m.traverse_text{|t| HTMLENCODER.decode(t.content)}
|
90
|
+
m.to_html
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
data/lib/conf.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module HTML2FB
|
4
|
+
class Conf
|
5
|
+
def initialize(file,conv)
|
6
|
+
['','./',"#{File.dirname(__FILE__)}/","#{File.dirname(__FILE__)}/../confs/"].each do |p|
|
7
|
+
f=p+file
|
8
|
+
begin
|
9
|
+
if File.readable?(f) && File.exists?(f)
|
10
|
+
@conf=File.open(f,'r'){|txt| YAML::load(txt)}
|
11
|
+
puts "loaded config file : "+f
|
12
|
+
@conf['conv']=conv
|
13
|
+
return
|
14
|
+
end
|
15
|
+
rescue Exception => e
|
16
|
+
STDERR.puts('unreadable conf : '+f+"\n"+e)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def [](x)
|
22
|
+
@conf[x]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/document.rb
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
module HTML2FB
|
2
|
+
|
3
|
+
class Section
|
4
|
+
attr_accessor :title
|
5
|
+
attr_accessor :content
|
6
|
+
attr_accessor :fblevel
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@content=[]
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_html
|
13
|
+
content.collect{|e|e.to_html}.join
|
14
|
+
end
|
15
|
+
|
16
|
+
def decorated_title
|
17
|
+
unless fblevel.nil?
|
18
|
+
"[#{fblevel}] "+title
|
19
|
+
else
|
20
|
+
title
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def titles
|
25
|
+
tit=[]
|
26
|
+
content.each do |f|
|
27
|
+
# if f.is_a?Section
|
28
|
+
tit.push f.titles
|
29
|
+
# else
|
30
|
+
# tit.push '#text'
|
31
|
+
# end
|
32
|
+
end
|
33
|
+
|
34
|
+
return [decorated_title,tit]
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_s
|
38
|
+
return "title :#{title} \n"+content.collect{|a|a.to_s}.join("\n\n")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
class Document < Section
|
43
|
+
def toc
|
44
|
+
#return content
|
45
|
+
return content.collect{|a|
|
46
|
+
a.titles
|
47
|
+
}
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
class Text
|
53
|
+
attr_accessor :content
|
54
|
+
|
55
|
+
def initialize(c='')
|
56
|
+
@content=c
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_html
|
60
|
+
@content
|
61
|
+
end
|
62
|
+
|
63
|
+
def to_s
|
64
|
+
@content
|
65
|
+
end
|
66
|
+
|
67
|
+
def titles
|
68
|
+
return ['#text']
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
data/lib/downloader.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module HTML2FB
|
5
|
+
class Downloader
|
6
|
+
def self.valid_url?(entry)
|
7
|
+
uri=URI.parse(entry)
|
8
|
+
Kernel.open(uri.to_s,'r')
|
9
|
+
return uri
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.download(uri)
|
13
|
+
print "Downloading "
|
14
|
+
puts uri.to_s
|
15
|
+
#tmp=Tempfile.new(uri.gsub(/[^a-z0-9]/,'_'))
|
16
|
+
#tmp.open('w'){|a|
|
17
|
+
# uri.open('r'){|b|
|
18
|
+
# a.write b
|
19
|
+
# }
|
20
|
+
#}
|
21
|
+
Kernel.open(uri.to_s,'r').read
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/feedbooks.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'app.rb'
|
2
|
+
require 'hpricot'
|
3
|
+
require 'digest/md5'
|
4
|
+
|
5
|
+
module HTML2FB
|
6
|
+
|
7
|
+
class FBSession
|
8
|
+
|
9
|
+
attr_accessor :bookid
|
10
|
+
attr_accessor :booktype
|
11
|
+
attr_accessor :user
|
12
|
+
attr_accessor :pass
|
13
|
+
attr_accessor :host
|
14
|
+
@@fbsession=nil
|
15
|
+
def initialize(conf)
|
16
|
+
StandardError.new('Already in session') unless @@fbsession.nil?
|
17
|
+
@@fbsession=self
|
18
|
+
self.bookid=ask(conf['fb']['bookid'],"Book Id")
|
19
|
+
self.booktype=ask(conf['fb']['booktype'],"Book Type")
|
20
|
+
self.user=ask(conf['fb']['user'],"User")
|
21
|
+
self.pass=ask(conf['fb']['pass'],"Pass")
|
22
|
+
self.host=conf['fb']['host']
|
23
|
+
self.host='feedbooks.com' if @host.nil?
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.session
|
27
|
+
return @@fbsession
|
28
|
+
end
|
29
|
+
|
30
|
+
def pass=(pas)
|
31
|
+
|
32
|
+
if pas.gsub(/[^a-z0-9]/,'').size==32
|
33
|
+
@pass=pas
|
34
|
+
else
|
35
|
+
@pass= Digest::MD5.hexdigest(pas)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
class Document
|
42
|
+
def to_feedbooks(conf)
|
43
|
+
FBSession.new(conf)
|
44
|
+
#File.open('/tmp/test3.html','w') do |f|
|
45
|
+
content.each do |e|
|
46
|
+
# f << e.to_feedbooks(conf)
|
47
|
+
e.to_feedbooks(conf,nil)
|
48
|
+
# f << " \n " * 10
|
49
|
+
end
|
50
|
+
#end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class FBPost
|
55
|
+
def self.push(conf,tit,cont,type,path=nil)
|
56
|
+
puts "Sending to feedbooks #{tit} with type #{type}"
|
57
|
+
fb=FBSession.session
|
58
|
+
if path.nil?
|
59
|
+
post=AtomPost.new "http://#{fb.host}/#{fb.booktype}/#{fb.bookid}/contents.atom"
|
60
|
+
else
|
61
|
+
post=AtomPost.new "http://#{fb.host}#{path}"
|
62
|
+
end
|
63
|
+
|
64
|
+
post.content=cont
|
65
|
+
post.user=fb.user
|
66
|
+
post.pass=fb.pass
|
67
|
+
post.date=Time.now
|
68
|
+
post.author=fb.user
|
69
|
+
post.title=tit
|
70
|
+
post.type=type
|
71
|
+
s=post.send
|
72
|
+
post.down_url(s) unless s.nil?
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
class Section
|
77
|
+
@@level=0
|
78
|
+
@@types=['Part','Chapter','Section']
|
79
|
+
def to_feedbooks(conf,path=nil)
|
80
|
+
type=self.fblevel.to_s.downcase.strip.capitalize
|
81
|
+
unless @@types.include?type
|
82
|
+
type=@@types[@@level]||@@types[-1]
|
83
|
+
end
|
84
|
+
fbpath=FBPost.push(conf,title,'',type,path)
|
85
|
+
@@level+=1
|
86
|
+
content.each do |e|
|
87
|
+
e.to_feedbooks(conf,fbpath)
|
88
|
+
end
|
89
|
+
@@level-=1
|
90
|
+
end
|
91
|
+
|
92
|
+
alias :old_to_html :to_html
|
93
|
+
|
94
|
+
def to_html
|
95
|
+
ret=nil
|
96
|
+
ret="<h#{@@level+1}>"+title+"</h#{@@level+1}>"
|
97
|
+
@@level+=1
|
98
|
+
ret+=old_to_html
|
99
|
+
@@level-=1
|
100
|
+
ret
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
class Text
|
105
|
+
def to_feedbooks(conf,path=nil)
|
106
|
+
stxt=to_html
|
107
|
+
return unless stxt.strip.size > 0
|
108
|
+
doc=Hpricot('<div xmlns:xhtml="http://www.w3.org/1999/xhtml">'+stxt+'</div>')
|
109
|
+
doc.traverse_all_element do |e|
|
110
|
+
unless e.is_a?Hpricot::Text
|
111
|
+
e.name='xhtml:'+e.name
|
112
|
+
e.etag='xhtml:'+e.etag unless (!e.respond_to?:etag) || e.etag.nil?
|
113
|
+
end
|
114
|
+
end
|
115
|
+
FBPost.push(conf,'',doc.to_html,"Text",path)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def ask(txt,disp='Prompt')
|
121
|
+
return txt unless txt.nil? || txt =='#ask#'
|
122
|
+
begin
|
123
|
+
txt=nil
|
124
|
+
print disp+' : '
|
125
|
+
txt=STDIN.readline.strip
|
126
|
+
end while txt.nil? || txt.size==0
|
127
|
+
txt
|
128
|
+
end
|
data/lib/parser.rb
ADDED
@@ -0,0 +1,335 @@
|
|
1
|
+
require 'hpricot'
|
2
|
+
require 'document.rb'
|
3
|
+
require 'progressbar'
|
4
|
+
#require 'ruby-prof'
|
5
|
+
#require 'term/ansicolor'
|
6
|
+
#include Term::ANSIColor
|
7
|
+
|
8
|
+
module HTML2FB
|
9
|
+
class Parser
|
10
|
+
|
11
|
+
def initialize(conf)
|
12
|
+
@conf=conf
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse(txt)
|
16
|
+
puts "Parsing HTML"
|
17
|
+
pdoc=Hpricot(txt)
|
18
|
+
if @conf['conv']
|
19
|
+
mc=pdoc/'meta[@http-equiv="Content-Type"]'
|
20
|
+
if mc.size>0
|
21
|
+
charset=mc.first.attributes['content'].split(';').find do |s|
|
22
|
+
s.strip[0,7]=='charset'
|
23
|
+
end
|
24
|
+
unless charset.nil?
|
25
|
+
tc=charset.split('=').last.strip
|
26
|
+
end
|
27
|
+
|
28
|
+
unless tc.nil?
|
29
|
+
puts "Trying to convert source encoding from #{tc} to utf-8"
|
30
|
+
require 'iconv'
|
31
|
+
pdoc=Hpricot(Iconv.conv('utf-8',tc.downcase,txt))
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
doc=Document.new
|
38
|
+
puts "Removing garbage elements"
|
39
|
+
remove_objs(pdoc)
|
40
|
+
ti=pdoc.at('title')
|
41
|
+
doc.title= ti.extract_text.strip unless ti.nil?
|
42
|
+
# pdoc.search('//h3').each do |e|
|
43
|
+
# doc.content.push(e.inner_text)
|
44
|
+
# end
|
45
|
+
|
46
|
+
puts "Building TOC"
|
47
|
+
parse_text(pdoc,doc)
|
48
|
+
|
49
|
+
# puts green(bold(doc.pretty_inspect))
|
50
|
+
|
51
|
+
return doc
|
52
|
+
end
|
53
|
+
protected
|
54
|
+
|
55
|
+
def remove_objs(doc)
|
56
|
+
if @conf['remove'] then
|
57
|
+
@conf['remove']['class'].each do |cl|
|
58
|
+
doc.search('.'+cl).remove
|
59
|
+
end unless @conf['remove']['class'].nil?
|
60
|
+
@conf['remove']['expr'].each do |cl|
|
61
|
+
doc.search(cl).remove
|
62
|
+
end unless @conf['remove']['expr'].nil?
|
63
|
+
@conf['remove']['before'].each do |cl|
|
64
|
+
x=doc.at(cl)
|
65
|
+
if x
|
66
|
+
x.preceding.remove
|
67
|
+
x.parent.children.delete(x)
|
68
|
+
end
|
69
|
+
end unless @conf['remove']['before'].nil?
|
70
|
+
@conf['remove']['between'].each do |cl|
|
71
|
+
# puts "between "+cl.inspect
|
72
|
+
t=doc.between(cl.first,cl.last)
|
73
|
+
t.remove unless t.nil?
|
74
|
+
end unless @conf['remove']['between'].nil?
|
75
|
+
@conf['remove']['after'].each do |cl|
|
76
|
+
x=doc.at(cl)
|
77
|
+
if x
|
78
|
+
x.following.remove
|
79
|
+
x.parent.children.delete(x)
|
80
|
+
end
|
81
|
+
end unless @conf['remove']['after'].nil?
|
82
|
+
end
|
83
|
+
# File.open('/tmp/test.html','w'){|f| f.write doc.to_html}
|
84
|
+
end
|
85
|
+
|
86
|
+
def parse_text(doc,ret)
|
87
|
+
# RubyProf.start
|
88
|
+
|
89
|
+
|
90
|
+
aut=build_autom(@conf['select'],ret)
|
91
|
+
|
92
|
+
pbar = ProgressBar.new("Parsing", doc.search('//').size)
|
93
|
+
doc.traverse_all_element do |el|
|
94
|
+
aut.feed(el)
|
95
|
+
pbar.inc
|
96
|
+
end
|
97
|
+
pbar.finish
|
98
|
+
aut.finish(doc)
|
99
|
+
=begin
|
100
|
+
result = RubyProf.stop
|
101
|
+
printer = RubyProf::FlatPrinter.new(result)
|
102
|
+
printer.print(STDOUT, 0)
|
103
|
+
printer.print(File.new('/versatile/prof','w'),0)
|
104
|
+
printer = RubyProf::GraphHtmlPrinter.new(result)
|
105
|
+
printer.print(File.new('/versatile/profgraph.html','w'), :min_percent=>0)
|
106
|
+
printer = RubyProf::CallTreePrinter.new(result)
|
107
|
+
printer.print(File.new('/versatile/profgraph.tree','w'), :min_percent=>0)
|
108
|
+
=end
|
109
|
+
end
|
110
|
+
|
111
|
+
protected
|
112
|
+
|
113
|
+
def build_autom(conf_tab,doc)
|
114
|
+
mach=StateMachine.new
|
115
|
+
build_rec(mach,conf_tab)
|
116
|
+
mach.reset(doc)
|
117
|
+
mach
|
118
|
+
end
|
119
|
+
|
120
|
+
def build_rec(mach,conf_tab)
|
121
|
+
return if conf_tab.size < 1
|
122
|
+
exprs=conf_tab.collect{|e| e.reject{|k,v| k=='select'} }
|
123
|
+
mach.add_level(exprs)
|
124
|
+
build_rec(mach,conf_tab.collect{|e| e['select'] }.flatten.reject{|a|a.nil?})
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
class StateMachine
|
129
|
+
|
130
|
+
def initialize
|
131
|
+
@levels=[]
|
132
|
+
@current_level=0
|
133
|
+
@starts=[]
|
134
|
+
@done=[]
|
135
|
+
@max_level=0
|
136
|
+
@content=nil
|
137
|
+
end
|
138
|
+
|
139
|
+
def add_level(tab)
|
140
|
+
tab=[tab] unless tab.is_a?Array
|
141
|
+
@levels.push tab
|
142
|
+
@current_level+=1
|
143
|
+
end
|
144
|
+
|
145
|
+
def reset(doc)
|
146
|
+
@current_level=0
|
147
|
+
@max_level=@levels.size
|
148
|
+
@starts[0]=doc
|
149
|
+
@content='body'
|
150
|
+
end
|
151
|
+
|
152
|
+
def inspect
|
153
|
+
@levels.inspect+"\n"+@current_level.to_s+"\n\n"+@done.inspect
|
154
|
+
end
|
155
|
+
|
156
|
+
def create_fbsection(title,fblevel)
|
157
|
+
s=Section.new
|
158
|
+
s.fblevel=fblevel
|
159
|
+
s.title = title
|
160
|
+
s
|
161
|
+
end
|
162
|
+
|
163
|
+
def create_textNode(txt)
|
164
|
+
Text.new(txt)
|
165
|
+
end
|
166
|
+
|
167
|
+
def finish(doc)
|
168
|
+
unless @content.nil?
|
169
|
+
# t=create_textNode(doc.root.search(@content...doc.children.last.xpath))
|
170
|
+
t=create_textNode(doc.at(@content).following.to_html)
|
171
|
+
@starts[@current_level].content.push(t)
|
172
|
+
end
|
173
|
+
(1..@max_level).to_a.reverse.each do |l|
|
174
|
+
close_section(l)
|
175
|
+
end
|
176
|
+
@starts[0]
|
177
|
+
end
|
178
|
+
|
179
|
+
def open_section(obj,lvl,el)
|
180
|
+
if @content=='body'
|
181
|
+
tmp=el.preceding[0..-1]
|
182
|
+
else
|
183
|
+
tmp=el.root.search(@content...(el.xpath))[1..-1]
|
184
|
+
end
|
185
|
+
if tmp.blank? #search can'find between siblins
|
186
|
+
tmp=el.root.deep_between(@content,(el.xpath))
|
187
|
+
end
|
188
|
+
unless tmp.blank?
|
189
|
+
tmph=tmp.to_html
|
190
|
+
unless tmph.blank?
|
191
|
+
t=create_textNode(tmph)
|
192
|
+
@starts[@current_level].content.push(t)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
(lvl..@max_level).to_a.reverse.each do |l|
|
196
|
+
close_section(l)
|
197
|
+
end
|
198
|
+
@starts[lvl]=create_fbsection(el.root.at(obj[:xpath]).extract_text,obj[:fblevel])
|
199
|
+
@content=obj[:xpath]
|
200
|
+
@current_level=lvl
|
201
|
+
end
|
202
|
+
|
203
|
+
def close_section(lvl)
|
204
|
+
return if @starts[lvl].nil?
|
205
|
+
llvl=lvl-1
|
206
|
+
llvl=llvl-1 until !@starts[llvl].nil?
|
207
|
+
@starts[llvl].content.push @starts[lvl]
|
208
|
+
@starts[lvl]=nil
|
209
|
+
end
|
210
|
+
|
211
|
+
def feed(el)
|
212
|
+
return if el.is_a?Hpricot::Text
|
213
|
+
@done=[[]*@levels.size]
|
214
|
+
|
215
|
+
@levels.each_with_index do |lvl,i|
|
216
|
+
lvl.each do |expr|
|
217
|
+
#puts i.to_s+" "+el.inspect if el.in_search?(expr['expr'])
|
218
|
+
if el.in_search?(expr['expr'])
|
219
|
+
|
220
|
+
|
221
|
+
open_section({:xpath => el.xpath, :fblevel => expr['fblevel']},i+1,el)
|
222
|
+
break
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
class String
|
233
|
+
def blank?
|
234
|
+
self !~ /\S/
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
class NilClass
|
239
|
+
def blank?
|
240
|
+
true
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
module Hpricot::Traverse
|
245
|
+
def in_search?(expr)
|
246
|
+
if expr !~ /[^a-z0-9]/
|
247
|
+
return self.name.downcase()==expr.downcase()
|
248
|
+
end
|
249
|
+
|
250
|
+
se_in=self.parent
|
251
|
+
if expr[0..1]=='/'
|
252
|
+
se_in=self.root
|
253
|
+
end
|
254
|
+
se_in.search(expr).each do |el|
|
255
|
+
return true if el==self
|
256
|
+
end
|
257
|
+
# puts self.name+" "+expr
|
258
|
+
return false
|
259
|
+
end
|
260
|
+
|
261
|
+
def root
|
262
|
+
return @root unless @root.nil?
|
263
|
+
se_in=self
|
264
|
+
se_in=se_in.parent until se_in.parent.nil?
|
265
|
+
@root=se_in
|
266
|
+
se_in
|
267
|
+
end
|
268
|
+
|
269
|
+
def between(a,b)
|
270
|
+
root.search(a..b)
|
271
|
+
end
|
272
|
+
|
273
|
+
def extract_text
|
274
|
+
t=''
|
275
|
+
self.traverse_all_element do |e|
|
276
|
+
t+=e.content.to_s if e.is_a?(Hpricot::Text)
|
277
|
+
end
|
278
|
+
t
|
279
|
+
end
|
280
|
+
def deep_between(i,j)
|
281
|
+
|
282
|
+
unless j.nil? || self.at(j).nil?
|
283
|
+
tm=self.at(i)
|
284
|
+
prec=tm.deep_preceding
|
285
|
+
r=Hpricot::Elements[*self.at(j).deep_preceding.find_all{|el| !(prec.include?el || el==tm)}]
|
286
|
+
else
|
287
|
+
r=self.at(i).deep_following unless self.at(i).nil?
|
288
|
+
end
|
289
|
+
Hpricot::Elements[*select_end(r,i)]
|
290
|
+
end
|
291
|
+
|
292
|
+
def select_end(tab,expr)
|
293
|
+
|
294
|
+
s=[]
|
295
|
+
f=false
|
296
|
+
idx=-1
|
297
|
+
i=0
|
298
|
+
tab.each do |e|
|
299
|
+
if e.search(expr.gsub(e.xpath,'.')).size > 0
|
300
|
+
idx=i
|
301
|
+
#if e.search(i).size > 0
|
302
|
+
if e.children.find{|ee| ee.xpath==expr }
|
303
|
+
e.children.each do |ee|
|
304
|
+
s << ee if f
|
305
|
+
f=true if ee.xpath==expr
|
306
|
+
end
|
307
|
+
else
|
308
|
+
s=select_end(e.children,expr)
|
309
|
+
end
|
310
|
+
break
|
311
|
+
else
|
312
|
+
i+=1
|
313
|
+
end
|
314
|
+
break if idx>0
|
315
|
+
end
|
316
|
+
return s+tab[(idx+1)..-1]
|
317
|
+
end
|
318
|
+
|
319
|
+
def deep_preceding()
|
320
|
+
ret=Hpricot::Elements[]
|
321
|
+
ret+=parent.deep_preceding if respond_to?(:parent) && !parent.is_a?(Hpricot::Doc )
|
322
|
+
ret+=preceding
|
323
|
+
Hpricot::Elements[*ret]
|
324
|
+
end
|
325
|
+
def deep_following()
|
326
|
+
ret=following
|
327
|
+
ret+=parent.deep_following if respond_to?(:parent) && !parent.is_a?(Hpricot::Doc )
|
328
|
+
Hpricot::Elements[*ret]
|
329
|
+
end
|
330
|
+
|
331
|
+
end
|
332
|
+
|
333
|
+
class Hpricot::Elements
|
334
|
+
alias_method :blank?, :empty?
|
335
|
+
end
|
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: Html2Feedbooks
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.7
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Benoit Larroque
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-04-28 00:00:00 +02:00
|
13
|
+
default_executable: html2fb.rb
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.8.1
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: htmlentities
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "4.0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: launchy
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0.3"
|
44
|
+
version:
|
45
|
+
- !ruby/object:Gem::Dependency
|
46
|
+
name: progressbar
|
47
|
+
type: :runtime
|
48
|
+
version_requirement:
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.0.3
|
54
|
+
version:
|
55
|
+
description: Html2Feedbooks is script to automate basic publishing on feedbooks.com
|
56
|
+
email: zeta dot ben at gmail dot com
|
57
|
+
executables:
|
58
|
+
- html2fb.rb
|
59
|
+
extensions: []
|
60
|
+
|
61
|
+
extra_rdoc_files: []
|
62
|
+
|
63
|
+
files:
|
64
|
+
- README
|
65
|
+
- confs/conf.yaml
|
66
|
+
- lib/app.rb
|
67
|
+
- lib/conf.rb
|
68
|
+
- lib/document.rb
|
69
|
+
- lib/downloader.rb
|
70
|
+
- lib/feedbooks.rb
|
71
|
+
- bin/html2fb.rb
|
72
|
+
- lib/parser.rb
|
73
|
+
has_rdoc: true
|
74
|
+
homepage: http://github.com/Html2Feedbooks
|
75
|
+
licenses: []
|
76
|
+
|
77
|
+
post_install_message:
|
78
|
+
rdoc_options: []
|
79
|
+
|
80
|
+
require_paths:
|
81
|
+
- lib
|
82
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: "0"
|
87
|
+
version:
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: "0"
|
93
|
+
version:
|
94
|
+
requirements: []
|
95
|
+
|
96
|
+
rubyforge_project:
|
97
|
+
rubygems_version: 1.3.5
|
98
|
+
signing_key:
|
99
|
+
specification_version: 3
|
100
|
+
summary: Html2Feedbooks is script to automate basic publishing on feedbooks.com
|
101
|
+
test_files: []
|
102
|
+
|