Html2Feedbooks 1.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/README +13 -0
- data/bin/html2fb.rb +63 -0
- data/confs/conf.yaml +25 -0
- data/lib/app.rb +93 -0
- data/lib/conf.rb +25 -0
- data/lib/document.rb +71 -0
- data/lib/downloader.rb +24 -0
- data/lib/feedbooks.rb +128 -0
- data/lib/parser.rb +335 -0
- metadata +102 -0
data/README
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
HTML2Feedbooks
|
2
|
+
==============
|
3
|
+
|
4
|
+
A script to automate basic publishing work on Feedbooks.com.
|
5
|
+
|
6
|
+
Usage
|
7
|
+
-----
|
8
|
+
|
9
|
+
./html2fb URL.html
|
10
|
+
|
11
|
+
You can change some settings by creating your own configuration file and using
|
12
|
+
|
13
|
+
html2fb -c myconf.yaml URL.html
|
data/bin/html2fb.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'conf.rb'
|
5
|
+
require 'downloader.rb'
|
6
|
+
require 'document.rb'
|
7
|
+
require 'parser.rb'
|
8
|
+
require 'feedbooks.rb'
|
9
|
+
require 'tmpdir'
|
10
|
+
require 'launchy'
|
11
|
+
|
12
|
+
include HTML2FB
|
13
|
+
|
14
|
+
options = {}
|
15
|
+
options[:conf] = "conf.yaml"
|
16
|
+
options[:preview] = true
|
17
|
+
options[:conv] = true
|
18
|
+
OptionParser.new do |opts|
|
19
|
+
opts.banner = "Usage: html2fb [options] URL"
|
20
|
+
|
21
|
+
opts.on("-c", "--conf FILE", String,"Configuration file") do |f|
|
22
|
+
options[:conf] = f
|
23
|
+
end
|
24
|
+
opts.on("-s", "-s","Send to feedbooks") do |f|
|
25
|
+
options[:preview] = !f
|
26
|
+
end
|
27
|
+
opts.on("-nc", "--no-conv","No charset conversion") do |f|
|
28
|
+
options[:conv] = !f
|
29
|
+
end
|
30
|
+
end.parse!
|
31
|
+
|
32
|
+
valid=false
|
33
|
+
entry=ARGV[0]
|
34
|
+
while !valid
|
35
|
+
url=nil
|
36
|
+
begin
|
37
|
+
url=Downloader.valid_url?(entry)
|
38
|
+
valid=true
|
39
|
+
rescue Exception => e
|
40
|
+
STDERR.puts 'Invalid URL' unless entry.nil? || entry==''
|
41
|
+
valid=false
|
42
|
+
puts e
|
43
|
+
end
|
44
|
+
print "URL : " if entry.nil? || entry==''
|
45
|
+
entry=STDIN.readline.strip unless valid
|
46
|
+
end
|
47
|
+
conf=Conf.new(options[:conf],options[:conv])
|
48
|
+
content=Downloader.download(url)
|
49
|
+
#puts content.size
|
50
|
+
doc=Parser.new(conf).parse(content)
|
51
|
+
puts doc.toc.to_yaml
|
52
|
+
if options[:preview]
|
53
|
+
page=File.join(Dir.tmpdir(),Digest::MD5.hexdigest(url.to_s))+'.html'
|
54
|
+
f=File.open(page,'w')
|
55
|
+
f.write doc.to_html
|
56
|
+
f.close
|
57
|
+
puts "A preview of the parsed file should be opening in your webbrowser now"
|
58
|
+
puts "If nothing open you can open the file located at : #{page}"
|
59
|
+
puts "When happy with the parsed output rerun with -s option to send to Feedbooks.com"
|
60
|
+
Launchy::Browser.run(page)
|
61
|
+
else
|
62
|
+
doc.to_feedbooks(conf)
|
63
|
+
end
|
data/confs/conf.yaml
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
remove:
|
2
|
+
class:
|
3
|
+
- totoc
|
4
|
+
- pagenum
|
5
|
+
- totoi
|
6
|
+
- img
|
7
|
+
- pg
|
8
|
+
expr:
|
9
|
+
- 'table'
|
10
|
+
- //pre
|
11
|
+
- hr
|
12
|
+
|
13
|
+
select:
|
14
|
+
- expr: h2
|
15
|
+
fblevel: Part
|
16
|
+
select:
|
17
|
+
- expr: h3
|
18
|
+
fblevel: Chapter
|
19
|
+
|
20
|
+
fb:
|
21
|
+
user: #ask#
|
22
|
+
bookid: #ask#
|
23
|
+
booktype: #ask#
|
24
|
+
pass: #ask#
|
25
|
+
host: 'feedbooks.com'
|
data/lib/app.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'digest/md5'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'net/http'
|
4
|
+
require 'time'
|
5
|
+
require 'htmlentities'
|
6
|
+
=begin
|
7
|
+
def colour(text, colour_code)
|
8
|
+
"#{colour_code}#{text}\e[0m"
|
9
|
+
end
|
10
|
+
def green(text); colour(text, "\e[32m"); end
|
11
|
+
def red(text); colour(text, "\e[31m"); end
|
12
|
+
def yellow(text); colour(text, "\e[33m"); end
|
13
|
+
def blue(text); colour(text, "\e[34m"); end
|
14
|
+
=end
|
15
|
+
|
16
|
+
class AtomPost
|
17
|
+
attr_accessor :title
|
18
|
+
attr_accessor :content
|
19
|
+
attr_accessor :date
|
20
|
+
attr_accessor :author
|
21
|
+
attr_accessor :addr
|
22
|
+
attr_accessor :user
|
23
|
+
attr_accessor :pass
|
24
|
+
attr_accessor :type
|
25
|
+
|
26
|
+
def initialize(addrs=nil)
|
27
|
+
self.addr=addrs unless addrs.nil?
|
28
|
+
end
|
29
|
+
|
30
|
+
def down_url(entry_url)
|
31
|
+
#STDERR.puts "scanning #{entry_url}"
|
32
|
+
url=URI.parse(entry_url)
|
33
|
+
Net::HTTP.start(url.host,url.port) {|http|
|
34
|
+
req = Net::HTTP::Get.new(url.path)
|
35
|
+
req.basic_auth user,pass unless user.nil?
|
36
|
+
response = http.request(req)
|
37
|
+
doc=Hpricot(response.body)
|
38
|
+
e=doc.at('//entry').at('link[@rel="down"]')
|
39
|
+
return URI.parse(e[:href]).path unless e.nil?
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
def send
|
44
|
+
raise StandardError.new('Missing Address') if addr.nil?
|
45
|
+
#3: Detailed control
|
46
|
+
url = URI.parse(addr)
|
47
|
+
#STDERR.puts "sending to #{url}"
|
48
|
+
req = Net::HTTP::Post.new(url.path)
|
49
|
+
req.basic_auth user,pass unless user.nil?
|
50
|
+
|
51
|
+
req.body = '<?xml version="1.0"?>'+"\n"
|
52
|
+
req.body +='<entry xmlns="http://www.w3.org/2005/Atom">'+"\n"
|
53
|
+
req.body +='<title>'+decode_text(title)+'</title>'+"\n"
|
54
|
+
req.body +='<id>'+Digest::MD5.hexdigest(title+content)+'</id>'+"\n"
|
55
|
+
req.body +='<updated>'+date.xmlschema+'</updated>'+"\n"
|
56
|
+
req.body +='<author><name>'+author+'</name></author>'+"\n"
|
57
|
+
req.body +='<content>'+recode_text(content)+'</content>'+"\n"
|
58
|
+
req.body +='<category label="'+type+'" term="'+type+'" />'+"\n" unless type.nil?
|
59
|
+
req.body +='</entry>'+"\n"
|
60
|
+
|
61
|
+
req.set_content_type('application/atom+xml;type=entry')
|
62
|
+
|
63
|
+
# STDERR.puts red("Send \n #{req.body.size > 500 ? req.body[0..250]+'[...]'+req.body[-250..-1]: req.body}")
|
64
|
+
|
65
|
+
res = Net::HTTP.new(url.host, url.port).start {|http| http.request(req) }
|
66
|
+
case res
|
67
|
+
when Net::HTTPSuccess, Net::HTTPRedirection
|
68
|
+
# STDERR.puts green(res['location']) if res['location']
|
69
|
+
res['location'] if res['location']
|
70
|
+
else
|
71
|
+
res.error!
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def recode_text(txt)
|
76
|
+
return txt if txt.blank?
|
77
|
+
m=Hpricot(txt)
|
78
|
+
m.traverse_text{|t| t.content=force_decimal_entities(t.content) if t.content.match(/&[a-z][a-z0-9]+;/i)}
|
79
|
+
m.to_html
|
80
|
+
end
|
81
|
+
HTMLENCODER=HTMLEntities.new
|
82
|
+
def force_decimal_entities(txt)
|
83
|
+
HTMLENCODER.encode(HTMLENCODER.decode(txt),:decimal)
|
84
|
+
end
|
85
|
+
|
86
|
+
def decode_text(txt)
|
87
|
+
return txt if txt.blank?
|
88
|
+
m=Hpricot(txt)
|
89
|
+
m.traverse_text{|t| HTMLENCODER.decode(t.content)}
|
90
|
+
m.to_html
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
data/lib/conf.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module HTML2FB
|
4
|
+
class Conf
|
5
|
+
def initialize(file,conv)
|
6
|
+
['','./',"#{File.dirname(__FILE__)}/","#{File.dirname(__FILE__)}/../confs/"].each do |p|
|
7
|
+
f=p+file
|
8
|
+
begin
|
9
|
+
if File.readable?(f) && File.exists?(f)
|
10
|
+
@conf=File.open(f,'r'){|txt| YAML::load(txt)}
|
11
|
+
puts "loaded config file : "+f
|
12
|
+
@conf['conv']=conv
|
13
|
+
return
|
14
|
+
end
|
15
|
+
rescue Exception => e
|
16
|
+
STDERR.puts('unreadable conf : '+f+"\n"+e)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def [](x)
|
22
|
+
@conf[x]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/document.rb
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
module HTML2FB
|
2
|
+
|
3
|
+
class Section
|
4
|
+
attr_accessor :title
|
5
|
+
attr_accessor :content
|
6
|
+
attr_accessor :fblevel
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@content=[]
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_html
|
13
|
+
content.collect{|e|e.to_html}.join
|
14
|
+
end
|
15
|
+
|
16
|
+
def decorated_title
|
17
|
+
unless fblevel.nil?
|
18
|
+
"[#{fblevel}] "+title
|
19
|
+
else
|
20
|
+
title
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def titles
|
25
|
+
tit=[]
|
26
|
+
content.each do |f|
|
27
|
+
# if f.is_a?Section
|
28
|
+
tit.push f.titles
|
29
|
+
# else
|
30
|
+
# tit.push '#text'
|
31
|
+
# end
|
32
|
+
end
|
33
|
+
|
34
|
+
return [decorated_title,tit]
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_s
|
38
|
+
return "title :#{title} \n"+content.collect{|a|a.to_s}.join("\n\n")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
class Document < Section
|
43
|
+
def toc
|
44
|
+
#return content
|
45
|
+
return content.collect{|a|
|
46
|
+
a.titles
|
47
|
+
}
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
class Text
|
53
|
+
attr_accessor :content
|
54
|
+
|
55
|
+
def initialize(c='')
|
56
|
+
@content=c
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_html
|
60
|
+
@content
|
61
|
+
end
|
62
|
+
|
63
|
+
def to_s
|
64
|
+
@content
|
65
|
+
end
|
66
|
+
|
67
|
+
def titles
|
68
|
+
return ['#text']
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
data/lib/downloader.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module HTML2FB
|
5
|
+
class Downloader
|
6
|
+
def self.valid_url?(entry)
|
7
|
+
uri=URI.parse(entry)
|
8
|
+
Kernel.open(uri.to_s,'r')
|
9
|
+
return uri
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.download(uri)
|
13
|
+
print "Downloading "
|
14
|
+
puts uri.to_s
|
15
|
+
#tmp=Tempfile.new(uri.gsub(/[^a-z0-9]/,'_'))
|
16
|
+
#tmp.open('w'){|a|
|
17
|
+
# uri.open('r'){|b|
|
18
|
+
# a.write b
|
19
|
+
# }
|
20
|
+
#}
|
21
|
+
Kernel.open(uri.to_s,'r').read
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/feedbooks.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'app.rb'
|
2
|
+
require 'hpricot'
|
3
|
+
require 'digest/md5'
|
4
|
+
|
5
|
+
module HTML2FB
|
6
|
+
|
7
|
+
class FBSession
|
8
|
+
|
9
|
+
attr_accessor :bookid
|
10
|
+
attr_accessor :booktype
|
11
|
+
attr_accessor :user
|
12
|
+
attr_accessor :pass
|
13
|
+
attr_accessor :host
|
14
|
+
@@fbsession=nil
|
15
|
+
def initialize(conf)
|
16
|
+
StandardError.new('Already in session') unless @@fbsession.nil?
|
17
|
+
@@fbsession=self
|
18
|
+
self.bookid=ask(conf['fb']['bookid'],"Book Id")
|
19
|
+
self.booktype=ask(conf['fb']['booktype'],"Book Type")
|
20
|
+
self.user=ask(conf['fb']['user'],"User")
|
21
|
+
self.pass=ask(conf['fb']['pass'],"Pass")
|
22
|
+
self.host=conf['fb']['host']
|
23
|
+
self.host='feedbooks.com' if @host.nil?
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.session
|
27
|
+
return @@fbsession
|
28
|
+
end
|
29
|
+
|
30
|
+
def pass=(pas)
|
31
|
+
|
32
|
+
if pas.gsub(/[^a-z0-9]/,'').size==32
|
33
|
+
@pass=pas
|
34
|
+
else
|
35
|
+
@pass= Digest::MD5.hexdigest(pas)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
class Document
|
42
|
+
def to_feedbooks(conf)
|
43
|
+
FBSession.new(conf)
|
44
|
+
#File.open('/tmp/test3.html','w') do |f|
|
45
|
+
content.each do |e|
|
46
|
+
# f << e.to_feedbooks(conf)
|
47
|
+
e.to_feedbooks(conf,nil)
|
48
|
+
# f << " \n " * 10
|
49
|
+
end
|
50
|
+
#end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class FBPost
|
55
|
+
def self.push(conf,tit,cont,type,path=nil)
|
56
|
+
puts "Sending to feedbooks #{tit} with type #{type}"
|
57
|
+
fb=FBSession.session
|
58
|
+
if path.nil?
|
59
|
+
post=AtomPost.new "http://#{fb.host}/#{fb.booktype}/#{fb.bookid}/contents.atom"
|
60
|
+
else
|
61
|
+
post=AtomPost.new "http://#{fb.host}#{path}"
|
62
|
+
end
|
63
|
+
|
64
|
+
post.content=cont
|
65
|
+
post.user=fb.user
|
66
|
+
post.pass=fb.pass
|
67
|
+
post.date=Time.now
|
68
|
+
post.author=fb.user
|
69
|
+
post.title=tit
|
70
|
+
post.type=type
|
71
|
+
s=post.send
|
72
|
+
post.down_url(s) unless s.nil?
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
class Section
|
77
|
+
@@level=0
|
78
|
+
@@types=['Part','Chapter','Section']
|
79
|
+
def to_feedbooks(conf,path=nil)
|
80
|
+
type=self.fblevel.to_s.downcase.strip.capitalize
|
81
|
+
unless @@types.include?type
|
82
|
+
type=@@types[@@level]||@@types[-1]
|
83
|
+
end
|
84
|
+
fbpath=FBPost.push(conf,title,'',type,path)
|
85
|
+
@@level+=1
|
86
|
+
content.each do |e|
|
87
|
+
e.to_feedbooks(conf,fbpath)
|
88
|
+
end
|
89
|
+
@@level-=1
|
90
|
+
end
|
91
|
+
|
92
|
+
alias :old_to_html :to_html
|
93
|
+
|
94
|
+
def to_html
|
95
|
+
ret=nil
|
96
|
+
ret="<h#{@@level+1}>"+title+"</h#{@@level+1}>"
|
97
|
+
@@level+=1
|
98
|
+
ret+=old_to_html
|
99
|
+
@@level-=1
|
100
|
+
ret
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
class Text
|
105
|
+
def to_feedbooks(conf,path=nil)
|
106
|
+
stxt=to_html
|
107
|
+
return unless stxt.strip.size > 0
|
108
|
+
doc=Hpricot('<div xmlns:xhtml="http://www.w3.org/1999/xhtml">'+stxt+'</div>')
|
109
|
+
doc.traverse_all_element do |e|
|
110
|
+
unless e.is_a?Hpricot::Text
|
111
|
+
e.name='xhtml:'+e.name
|
112
|
+
e.etag='xhtml:'+e.etag unless (!e.respond_to?:etag) || e.etag.nil?
|
113
|
+
end
|
114
|
+
end
|
115
|
+
FBPost.push(conf,'',doc.to_html,"Text",path)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def ask(txt,disp='Prompt')
|
121
|
+
return txt unless txt.nil? || txt =='#ask#'
|
122
|
+
begin
|
123
|
+
txt=nil
|
124
|
+
print disp+' : '
|
125
|
+
txt=STDIN.readline.strip
|
126
|
+
end while txt.nil? || txt.size==0
|
127
|
+
txt
|
128
|
+
end
|
data/lib/parser.rb
ADDED
@@ -0,0 +1,335 @@
|
|
1
|
+
require 'hpricot'
|
2
|
+
require 'document.rb'
|
3
|
+
require 'progressbar'
|
4
|
+
#require 'ruby-prof'
|
5
|
+
#require 'term/ansicolor'
|
6
|
+
#include Term::ANSIColor
|
7
|
+
|
8
|
+
module HTML2FB
|
9
|
+
class Parser
|
10
|
+
|
11
|
+
def initialize(conf)
|
12
|
+
@conf=conf
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse(txt)
|
16
|
+
puts "Parsing HTML"
|
17
|
+
pdoc=Hpricot(txt)
|
18
|
+
if @conf['conv']
|
19
|
+
mc=pdoc/'meta[@http-equiv="Content-Type"]'
|
20
|
+
if mc.size>0
|
21
|
+
charset=mc.first.attributes['content'].split(';').find do |s|
|
22
|
+
s.strip[0,7]=='charset'
|
23
|
+
end
|
24
|
+
unless charset.nil?
|
25
|
+
tc=charset.split('=').last.strip
|
26
|
+
end
|
27
|
+
|
28
|
+
unless tc.nil?
|
29
|
+
puts "Trying to convert source encoding from #{tc} to utf-8"
|
30
|
+
require 'iconv'
|
31
|
+
pdoc=Hpricot(Iconv.conv('utf-8',tc.downcase,txt))
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
doc=Document.new
|
38
|
+
puts "Removing garbage elements"
|
39
|
+
remove_objs(pdoc)
|
40
|
+
ti=pdoc.at('title')
|
41
|
+
doc.title= ti.extract_text.strip unless ti.nil?
|
42
|
+
# pdoc.search('//h3').each do |e|
|
43
|
+
# doc.content.push(e.inner_text)
|
44
|
+
# end
|
45
|
+
|
46
|
+
puts "Building TOC"
|
47
|
+
parse_text(pdoc,doc)
|
48
|
+
|
49
|
+
# puts green(bold(doc.pretty_inspect))
|
50
|
+
|
51
|
+
return doc
|
52
|
+
end
|
53
|
+
protected
|
54
|
+
|
55
|
+
def remove_objs(doc)
|
56
|
+
if @conf['remove'] then
|
57
|
+
@conf['remove']['class'].each do |cl|
|
58
|
+
doc.search('.'+cl).remove
|
59
|
+
end unless @conf['remove']['class'].nil?
|
60
|
+
@conf['remove']['expr'].each do |cl|
|
61
|
+
doc.search(cl).remove
|
62
|
+
end unless @conf['remove']['expr'].nil?
|
63
|
+
@conf['remove']['before'].each do |cl|
|
64
|
+
x=doc.at(cl)
|
65
|
+
if x
|
66
|
+
x.preceding.remove
|
67
|
+
x.parent.children.delete(x)
|
68
|
+
end
|
69
|
+
end unless @conf['remove']['before'].nil?
|
70
|
+
@conf['remove']['between'].each do |cl|
|
71
|
+
# puts "between "+cl.inspect
|
72
|
+
t=doc.between(cl.first,cl.last)
|
73
|
+
t.remove unless t.nil?
|
74
|
+
end unless @conf['remove']['between'].nil?
|
75
|
+
@conf['remove']['after'].each do |cl|
|
76
|
+
x=doc.at(cl)
|
77
|
+
if x
|
78
|
+
x.following.remove
|
79
|
+
x.parent.children.delete(x)
|
80
|
+
end
|
81
|
+
end unless @conf['remove']['after'].nil?
|
82
|
+
end
|
83
|
+
# File.open('/tmp/test.html','w'){|f| f.write doc.to_html}
|
84
|
+
end
|
85
|
+
|
86
|
+
def parse_text(doc,ret)
|
87
|
+
# RubyProf.start
|
88
|
+
|
89
|
+
|
90
|
+
aut=build_autom(@conf['select'],ret)
|
91
|
+
|
92
|
+
pbar = ProgressBar.new("Parsing", doc.search('//').size)
|
93
|
+
doc.traverse_all_element do |el|
|
94
|
+
aut.feed(el)
|
95
|
+
pbar.inc
|
96
|
+
end
|
97
|
+
pbar.finish
|
98
|
+
aut.finish(doc)
|
99
|
+
=begin
|
100
|
+
result = RubyProf.stop
|
101
|
+
printer = RubyProf::FlatPrinter.new(result)
|
102
|
+
printer.print(STDOUT, 0)
|
103
|
+
printer.print(File.new('/versatile/prof','w'),0)
|
104
|
+
printer = RubyProf::GraphHtmlPrinter.new(result)
|
105
|
+
printer.print(File.new('/versatile/profgraph.html','w'), :min_percent=>0)
|
106
|
+
printer = RubyProf::CallTreePrinter.new(result)
|
107
|
+
printer.print(File.new('/versatile/profgraph.tree','w'), :min_percent=>0)
|
108
|
+
=end
|
109
|
+
end
|
110
|
+
|
111
|
+
protected
|
112
|
+
|
113
|
+
def build_autom(conf_tab,doc)
|
114
|
+
mach=StateMachine.new
|
115
|
+
build_rec(mach,conf_tab)
|
116
|
+
mach.reset(doc)
|
117
|
+
mach
|
118
|
+
end
|
119
|
+
|
120
|
+
def build_rec(mach,conf_tab)
|
121
|
+
return if conf_tab.size < 1
|
122
|
+
exprs=conf_tab.collect{|e| e.reject{|k,v| k=='select'} }
|
123
|
+
mach.add_level(exprs)
|
124
|
+
build_rec(mach,conf_tab.collect{|e| e['select'] }.flatten.reject{|a|a.nil?})
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
class StateMachine
|
129
|
+
|
130
|
+
def initialize
|
131
|
+
@levels=[]
|
132
|
+
@current_level=0
|
133
|
+
@starts=[]
|
134
|
+
@done=[]
|
135
|
+
@max_level=0
|
136
|
+
@content=nil
|
137
|
+
end
|
138
|
+
|
139
|
+
def add_level(tab)
|
140
|
+
tab=[tab] unless tab.is_a?Array
|
141
|
+
@levels.push tab
|
142
|
+
@current_level+=1
|
143
|
+
end
|
144
|
+
|
145
|
+
def reset(doc)
|
146
|
+
@current_level=0
|
147
|
+
@max_level=@levels.size
|
148
|
+
@starts[0]=doc
|
149
|
+
@content='body'
|
150
|
+
end
|
151
|
+
|
152
|
+
def inspect
|
153
|
+
@levels.inspect+"\n"+@current_level.to_s+"\n\n"+@done.inspect
|
154
|
+
end
|
155
|
+
|
156
|
+
def create_fbsection(title,fblevel)
|
157
|
+
s=Section.new
|
158
|
+
s.fblevel=fblevel
|
159
|
+
s.title = title
|
160
|
+
s
|
161
|
+
end
|
162
|
+
|
163
|
+
def create_textNode(txt)
|
164
|
+
Text.new(txt)
|
165
|
+
end
|
166
|
+
|
167
|
+
def finish(doc)
|
168
|
+
unless @content.nil?
|
169
|
+
# t=create_textNode(doc.root.search(@content...doc.children.last.xpath))
|
170
|
+
t=create_textNode(doc.at(@content).following.to_html)
|
171
|
+
@starts[@current_level].content.push(t)
|
172
|
+
end
|
173
|
+
(1..@max_level).to_a.reverse.each do |l|
|
174
|
+
close_section(l)
|
175
|
+
end
|
176
|
+
@starts[0]
|
177
|
+
end
|
178
|
+
|
179
|
+
def open_section(obj,lvl,el)
|
180
|
+
if @content=='body'
|
181
|
+
tmp=el.preceding[0..-1]
|
182
|
+
else
|
183
|
+
tmp=el.root.search(@content...(el.xpath))[1..-1]
|
184
|
+
end
|
185
|
+
if tmp.blank? #search can'find between siblins
|
186
|
+
tmp=el.root.deep_between(@content,(el.xpath))
|
187
|
+
end
|
188
|
+
unless tmp.blank?
|
189
|
+
tmph=tmp.to_html
|
190
|
+
unless tmph.blank?
|
191
|
+
t=create_textNode(tmph)
|
192
|
+
@starts[@current_level].content.push(t)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
(lvl..@max_level).to_a.reverse.each do |l|
|
196
|
+
close_section(l)
|
197
|
+
end
|
198
|
+
@starts[lvl]=create_fbsection(el.root.at(obj[:xpath]).extract_text,obj[:fblevel])
|
199
|
+
@content=obj[:xpath]
|
200
|
+
@current_level=lvl
|
201
|
+
end
|
202
|
+
|
203
|
+
def close_section(lvl)
|
204
|
+
return if @starts[lvl].nil?
|
205
|
+
llvl=lvl-1
|
206
|
+
llvl=llvl-1 until !@starts[llvl].nil?
|
207
|
+
@starts[llvl].content.push @starts[lvl]
|
208
|
+
@starts[lvl]=nil
|
209
|
+
end
|
210
|
+
|
211
|
+
def feed(el)
|
212
|
+
return if el.is_a?Hpricot::Text
|
213
|
+
@done=[[]*@levels.size]
|
214
|
+
|
215
|
+
@levels.each_with_index do |lvl,i|
|
216
|
+
lvl.each do |expr|
|
217
|
+
#puts i.to_s+" "+el.inspect if el.in_search?(expr['expr'])
|
218
|
+
if el.in_search?(expr['expr'])
|
219
|
+
|
220
|
+
|
221
|
+
open_section({:xpath => el.xpath, :fblevel => expr['fblevel']},i+1,el)
|
222
|
+
break
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
class String
|
233
|
+
def blank?
|
234
|
+
self !~ /\S/
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
class NilClass
|
239
|
+
def blank?
|
240
|
+
true
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
module Hpricot::Traverse
|
245
|
+
def in_search?(expr)
|
246
|
+
if expr !~ /[^a-z0-9]/
|
247
|
+
return self.name.downcase()==expr.downcase()
|
248
|
+
end
|
249
|
+
|
250
|
+
se_in=self.parent
|
251
|
+
if expr[0..1]=='/'
|
252
|
+
se_in=self.root
|
253
|
+
end
|
254
|
+
se_in.search(expr).each do |el|
|
255
|
+
return true if el==self
|
256
|
+
end
|
257
|
+
# puts self.name+" "+expr
|
258
|
+
return false
|
259
|
+
end
|
260
|
+
|
261
|
+
def root
|
262
|
+
return @root unless @root.nil?
|
263
|
+
se_in=self
|
264
|
+
se_in=se_in.parent until se_in.parent.nil?
|
265
|
+
@root=se_in
|
266
|
+
se_in
|
267
|
+
end
|
268
|
+
|
269
|
+
def between(a,b)
|
270
|
+
root.search(a..b)
|
271
|
+
end
|
272
|
+
|
273
|
+
def extract_text
|
274
|
+
t=''
|
275
|
+
self.traverse_all_element do |e|
|
276
|
+
t+=e.content.to_s if e.is_a?(Hpricot::Text)
|
277
|
+
end
|
278
|
+
t
|
279
|
+
end
|
280
|
+
def deep_between(i,j)
|
281
|
+
|
282
|
+
unless j.nil? || self.at(j).nil?
|
283
|
+
tm=self.at(i)
|
284
|
+
prec=tm.deep_preceding
|
285
|
+
r=Hpricot::Elements[*self.at(j).deep_preceding.find_all{|el| !(prec.include?el || el==tm)}]
|
286
|
+
else
|
287
|
+
r=self.at(i).deep_following unless self.at(i).nil?
|
288
|
+
end
|
289
|
+
Hpricot::Elements[*select_end(r,i)]
|
290
|
+
end
|
291
|
+
|
292
|
+
def select_end(tab,expr)
|
293
|
+
|
294
|
+
s=[]
|
295
|
+
f=false
|
296
|
+
idx=-1
|
297
|
+
i=0
|
298
|
+
tab.each do |e|
|
299
|
+
if e.search(expr.gsub(e.xpath,'.')).size > 0
|
300
|
+
idx=i
|
301
|
+
#if e.search(i).size > 0
|
302
|
+
if e.children.find{|ee| ee.xpath==expr }
|
303
|
+
e.children.each do |ee|
|
304
|
+
s << ee if f
|
305
|
+
f=true if ee.xpath==expr
|
306
|
+
end
|
307
|
+
else
|
308
|
+
s=select_end(e.children,expr)
|
309
|
+
end
|
310
|
+
break
|
311
|
+
else
|
312
|
+
i+=1
|
313
|
+
end
|
314
|
+
break if idx>0
|
315
|
+
end
|
316
|
+
return s+tab[(idx+1)..-1]
|
317
|
+
end
|
318
|
+
|
319
|
+
def deep_preceding()
|
320
|
+
ret=Hpricot::Elements[]
|
321
|
+
ret+=parent.deep_preceding if respond_to?(:parent) && !parent.is_a?(Hpricot::Doc )
|
322
|
+
ret+=preceding
|
323
|
+
Hpricot::Elements[*ret]
|
324
|
+
end
|
325
|
+
def deep_following()
|
326
|
+
ret=following
|
327
|
+
ret+=parent.deep_following if respond_to?(:parent) && !parent.is_a?(Hpricot::Doc )
|
328
|
+
Hpricot::Elements[*ret]
|
329
|
+
end
|
330
|
+
|
331
|
+
end
|
332
|
+
|
333
|
+
class Hpricot::Elements
|
334
|
+
alias_method :blank?, :empty?
|
335
|
+
end
|
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: Html2Feedbooks
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.7
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Benoit Larroque
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-04-28 00:00:00 +02:00
|
13
|
+
default_executable: html2fb.rb
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.8.1
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: htmlentities
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "4.0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: launchy
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0.3"
|
44
|
+
version:
|
45
|
+
- !ruby/object:Gem::Dependency
|
46
|
+
name: progressbar
|
47
|
+
type: :runtime
|
48
|
+
version_requirement:
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.0.3
|
54
|
+
version:
|
55
|
+
description: Html2Feedbooks is script to automate basic publishing on feedbooks.com
|
56
|
+
email: zeta dot ben at gmail dot com
|
57
|
+
executables:
|
58
|
+
- html2fb.rb
|
59
|
+
extensions: []
|
60
|
+
|
61
|
+
extra_rdoc_files: []
|
62
|
+
|
63
|
+
files:
|
64
|
+
- README
|
65
|
+
- confs/conf.yaml
|
66
|
+
- lib/app.rb
|
67
|
+
- lib/conf.rb
|
68
|
+
- lib/document.rb
|
69
|
+
- lib/downloader.rb
|
70
|
+
- lib/feedbooks.rb
|
71
|
+
- bin/html2fb.rb
|
72
|
+
- lib/parser.rb
|
73
|
+
has_rdoc: true
|
74
|
+
homepage: http://github.com/Html2Feedbooks
|
75
|
+
licenses: []
|
76
|
+
|
77
|
+
post_install_message:
|
78
|
+
rdoc_options: []
|
79
|
+
|
80
|
+
require_paths:
|
81
|
+
- lib
|
82
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: "0"
|
87
|
+
version:
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: "0"
|
93
|
+
version:
|
94
|
+
requirements: []
|
95
|
+
|
96
|
+
rubyforge_project:
|
97
|
+
rubygems_version: 1.3.5
|
98
|
+
signing_key:
|
99
|
+
specification_version: 3
|
100
|
+
summary: Html2Feedbooks is script to automate basic publishing on feedbooks.com
|
101
|
+
test_files: []
|
102
|
+
|