Html2Feedbooks 1.0.7

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,13 @@
1
+ HTML2Feedbooks
2
+ ==============
3
+
4
+ A script to automate basic publishing work on Feedbooks.com.
5
+
6
+ Usage
7
+ -----
8
+
9
+ ./html2fb URL.html
10
+
11
+ You can change some settings by creating your own configuration file and using
12
+
13
+ html2fb -c myconf.yaml URL.html
data/bin/html2fb.rb ADDED
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/ruby
2
+ require 'optparse'
3
+ require 'open-uri'
4
+ require 'conf.rb'
5
+ require 'downloader.rb'
6
+ require 'document.rb'
7
+ require 'parser.rb'
8
+ require 'feedbooks.rb'
9
+ require 'tmpdir'
10
+ require 'launchy'
11
+
12
+ include HTML2FB
13
+
14
+ options = {}
15
+ options[:conf] = "conf.yaml"
16
+ options[:preview] = true
17
+ options[:conv] = true
18
+ OptionParser.new do |opts|
19
+ opts.banner = "Usage: html2fb [options] URL"
20
+
21
+ opts.on("-c", "--conf FILE", String,"Configuration file") do |f|
22
+ options[:conf] = f
23
+ end
24
+ opts.on("-s", "-s","Send to feedbooks") do |f|
25
+ options[:preview] = !f
26
+ end
27
+ opts.on("-nc", "--no-conv","No charset conversion") do |f|
28
+ options[:conv] = !f
29
+ end
30
+ end.parse!
31
+
32
+ valid=false
33
+ entry=ARGV[0]
34
+ while !valid
35
+ url=nil
36
+ begin
37
+ url=Downloader.valid_url?(entry)
38
+ valid=true
39
+ rescue Exception => e
40
+ STDERR.puts 'Invalid URL' unless entry.nil? || entry==''
41
+ valid=false
42
+ puts e
43
+ end
44
+ print "URL : " if entry.nil? || entry==''
45
+ entry=STDIN.readline.strip unless valid
46
+ end
47
+ conf=Conf.new(options[:conf],options[:conv])
48
+ content=Downloader.download(url)
49
+ #puts content.size
50
+ doc=Parser.new(conf).parse(content)
51
+ puts doc.toc.to_yaml
52
+ if options[:preview]
53
+ page=File.join(Dir.tmpdir(),Digest::MD5.hexdigest(url.to_s))+'.html'
54
+ f=File.open(page,'w')
55
+ f.write doc.to_html
56
+ f.close
57
+ puts "A preview of the parsed file should be opening in your webbrowser now"
58
+ puts "If nothing open you can open the file located at : #{page}"
59
+ puts "When happy with the parsed output rerun with -s option to send to Feedbooks.com"
60
+ Launchy::Browser.run(page)
61
+ else
62
+ doc.to_feedbooks(conf)
63
+ end
data/confs/conf.yaml ADDED
@@ -0,0 +1,25 @@
1
+ remove:
2
+ class:
3
+ - totoc
4
+ - pagenum
5
+ - totoi
6
+ - img
7
+ - pg
8
+ expr:
9
+ - 'table'
10
+ - //pre
11
+ - hr
12
+
13
+ select:
14
+ - expr: h2
15
+ fblevel: Part
16
+ select:
17
+ - expr: h3
18
+ fblevel: Chapter
19
+
20
+ fb:
21
+ user: #ask#
22
+ bookid: #ask#
23
+ booktype: #ask#
24
+ pass: #ask#
25
+ host: 'feedbooks.com'
data/lib/app.rb ADDED
@@ -0,0 +1,93 @@
1
+ require 'digest/md5'
2
+ require 'open-uri'
3
+ require 'net/http'
4
+ require 'time'
5
+ require 'htmlentities'
6
+ =begin
7
+ def colour(text, colour_code)
8
+ "#{colour_code}#{text}\e[0m"
9
+ end
10
+ def green(text); colour(text, "\e[32m"); end
11
+ def red(text); colour(text, "\e[31m"); end
12
+ def yellow(text); colour(text, "\e[33m"); end
13
+ def blue(text); colour(text, "\e[34m"); end
14
+ =end
15
+
16
+ class AtomPost
17
+ attr_accessor :title
18
+ attr_accessor :content
19
+ attr_accessor :date
20
+ attr_accessor :author
21
+ attr_accessor :addr
22
+ attr_accessor :user
23
+ attr_accessor :pass
24
+ attr_accessor :type
25
+
26
+ def initialize(addrs=nil)
27
+ self.addr=addrs unless addrs.nil?
28
+ end
29
+
30
+ def down_url(entry_url)
31
+ #STDERR.puts "scanning #{entry_url}"
32
+ url=URI.parse(entry_url)
33
+ Net::HTTP.start(url.host,url.port) {|http|
34
+ req = Net::HTTP::Get.new(url.path)
35
+ req.basic_auth user,pass unless user.nil?
36
+ response = http.request(req)
37
+ doc=Hpricot(response.body)
38
+ e=doc.at('//entry').at('link[@rel="down"]')
39
+ return URI.parse(e[:href]).path unless e.nil?
40
+ }
41
+ end
42
+
43
+ def send
44
+ raise StandardError.new('Missing Address') if addr.nil?
45
+ #3: Detailed control
46
+ url = URI.parse(addr)
47
+ #STDERR.puts "sending to #{url}"
48
+ req = Net::HTTP::Post.new(url.path)
49
+ req.basic_auth user,pass unless user.nil?
50
+
51
+ req.body = '<?xml version="1.0"?>'+"\n"
52
+ req.body +='<entry xmlns="http://www.w3.org/2005/Atom">'+"\n"
53
+ req.body +='<title>'+decode_text(title)+'</title>'+"\n"
54
+ req.body +='<id>'+Digest::MD5.hexdigest(title+content)+'</id>'+"\n"
55
+ req.body +='<updated>'+date.xmlschema+'</updated>'+"\n"
56
+ req.body +='<author><name>'+author+'</name></author>'+"\n"
57
+ req.body +='<content>'+recode_text(content)+'</content>'+"\n"
58
+ req.body +='<category label="'+type+'" term="'+type+'" />'+"\n" unless type.nil?
59
+ req.body +='</entry>'+"\n"
60
+
61
+ req.set_content_type('application/atom+xml;type=entry')
62
+
63
+ # STDERR.puts red("Send \n #{req.body.size > 500 ? req.body[0..250]+'[...]'+req.body[-250..-1]: req.body}")
64
+
65
+ res = Net::HTTP.new(url.host, url.port).start {|http| http.request(req) }
66
+ case res
67
+ when Net::HTTPSuccess, Net::HTTPRedirection
68
+ # STDERR.puts green(res['location']) if res['location']
69
+ res['location'] if res['location']
70
+ else
71
+ res.error!
72
+ end
73
+ end
74
+
75
+ def recode_text(txt)
76
+ return txt if txt.blank?
77
+ m=Hpricot(txt)
78
+ m.traverse_text{|t| t.content=force_decimal_entities(t.content) if t.content.match(/&[a-z][a-z0-9]+;/i)}
79
+ m.to_html
80
+ end
81
+ HTMLENCODER=HTMLEntities.new
82
+ def force_decimal_entities(txt)
83
+ HTMLENCODER.encode(HTMLENCODER.decode(txt),:decimal)
84
+ end
85
+
86
+ def decode_text(txt)
87
+ return txt if txt.blank?
88
+ m=Hpricot(txt)
89
+ m.traverse_text{|t| HTMLENCODER.decode(t.content)}
90
+ m.to_html
91
+ end
92
+
93
+ end
data/lib/conf.rb ADDED
@@ -0,0 +1,25 @@
1
+ require 'yaml'
2
+
3
+ module HTML2FB
4
+ class Conf
5
+ def initialize(file,conv)
6
+ ['','./',"#{File.dirname(__FILE__)}/","#{File.dirname(__FILE__)}/../confs/"].each do |p|
7
+ f=p+file
8
+ begin
9
+ if File.readable?(f) && File.exists?(f)
10
+ @conf=File.open(f,'r'){|txt| YAML::load(txt)}
11
+ puts "loaded config file : "+f
12
+ @conf['conv']=conv
13
+ return
14
+ end
15
+ rescue Exception => e
16
+ STDERR.puts('unreadable conf : '+f+"\n"+e)
17
+ end
18
+ end
19
+ end
20
+
21
+ def [](x)
22
+ @conf[x]
23
+ end
24
+ end
25
+ end
data/lib/document.rb ADDED
@@ -0,0 +1,71 @@
1
+ module HTML2FB
2
+
3
+ class Section
4
+ attr_accessor :title
5
+ attr_accessor :content
6
+ attr_accessor :fblevel
7
+
8
+ def initialize
9
+ @content=[]
10
+ end
11
+
12
+ def to_html
13
+ content.collect{|e|e.to_html}.join
14
+ end
15
+
16
+ def decorated_title
17
+ unless fblevel.nil?
18
+ "[#{fblevel}] "+title
19
+ else
20
+ title
21
+ end
22
+ end
23
+
24
+ def titles
25
+ tit=[]
26
+ content.each do |f|
27
+ # if f.is_a?Section
28
+ tit.push f.titles
29
+ # else
30
+ # tit.push '#text'
31
+ # end
32
+ end
33
+
34
+ return [decorated_title,tit]
35
+ end
36
+
37
+ def to_s
38
+ return "title :#{title} \n"+content.collect{|a|a.to_s}.join("\n\n")
39
+ end
40
+ end
41
+
42
+ class Document < Section
43
+ def toc
44
+ #return content
45
+ return content.collect{|a|
46
+ a.titles
47
+ }
48
+ end
49
+
50
+ end
51
+
52
+ class Text
53
+ attr_accessor :content
54
+
55
+ def initialize(c='')
56
+ @content=c
57
+ end
58
+
59
+ def to_html
60
+ @content
61
+ end
62
+
63
+ def to_s
64
+ @content
65
+ end
66
+
67
+ def titles
68
+ return ['#text']
69
+ end
70
+ end
71
+ end
data/lib/downloader.rb ADDED
@@ -0,0 +1,24 @@
1
+ require 'open-uri'
2
+ require 'tempfile'
3
+
4
+ module HTML2FB
5
+ class Downloader
6
+ def self.valid_url?(entry)
7
+ uri=URI.parse(entry)
8
+ Kernel.open(uri.to_s,'r')
9
+ return uri
10
+ end
11
+
12
+ def self.download(uri)
13
+ print "Downloading "
14
+ puts uri.to_s
15
+ #tmp=Tempfile.new(uri.gsub(/[^a-z0-9]/,'_'))
16
+ #tmp.open('w'){|a|
17
+ # uri.open('r'){|b|
18
+ # a.write b
19
+ # }
20
+ #}
21
+ Kernel.open(uri.to_s,'r').read
22
+ end
23
+ end
24
+ end
data/lib/feedbooks.rb ADDED
@@ -0,0 +1,128 @@
1
+ require 'app.rb'
2
+ require 'hpricot'
3
+ require 'digest/md5'
4
+
5
+ module HTML2FB
6
+
7
+ class FBSession
8
+
9
+ attr_accessor :bookid
10
+ attr_accessor :booktype
11
+ attr_accessor :user
12
+ attr_accessor :pass
13
+ attr_accessor :host
14
+ @@fbsession=nil
15
+ def initialize(conf)
16
+ StandardError.new('Already in session') unless @@fbsession.nil?
17
+ @@fbsession=self
18
+ self.bookid=ask(conf['fb']['bookid'],"Book Id")
19
+ self.booktype=ask(conf['fb']['booktype'],"Book Type")
20
+ self.user=ask(conf['fb']['user'],"User")
21
+ self.pass=ask(conf['fb']['pass'],"Pass")
22
+ self.host=conf['fb']['host']
23
+ self.host='feedbooks.com' if @host.nil?
24
+ end
25
+
26
+ def self.session
27
+ return @@fbsession
28
+ end
29
+
30
+ def pass=(pas)
31
+
32
+ if pas.gsub(/[^a-z0-9]/,'').size==32
33
+ @pass=pas
34
+ else
35
+ @pass= Digest::MD5.hexdigest(pas)
36
+ end
37
+ end
38
+ end
39
+
40
+
41
+ class Document
42
+ def to_feedbooks(conf)
43
+ FBSession.new(conf)
44
+ #File.open('/tmp/test3.html','w') do |f|
45
+ content.each do |e|
46
+ # f << e.to_feedbooks(conf)
47
+ e.to_feedbooks(conf,nil)
48
+ # f << " \n " * 10
49
+ end
50
+ #end
51
+ end
52
+ end
53
+
54
+ class FBPost
55
+ def self.push(conf,tit,cont,type,path=nil)
56
+ puts "Sending to feedbooks #{tit} with type #{type}"
57
+ fb=FBSession.session
58
+ if path.nil?
59
+ post=AtomPost.new "http://#{fb.host}/#{fb.booktype}/#{fb.bookid}/contents.atom"
60
+ else
61
+ post=AtomPost.new "http://#{fb.host}#{path}"
62
+ end
63
+
64
+ post.content=cont
65
+ post.user=fb.user
66
+ post.pass=fb.pass
67
+ post.date=Time.now
68
+ post.author=fb.user
69
+ post.title=tit
70
+ post.type=type
71
+ s=post.send
72
+ post.down_url(s) unless s.nil?
73
+ end
74
+ end
75
+
76
+ class Section
77
+ @@level=0
78
+ @@types=['Part','Chapter','Section']
79
+ def to_feedbooks(conf,path=nil)
80
+ type=self.fblevel.to_s.downcase.strip.capitalize
81
+ unless @@types.include?type
82
+ type=@@types[@@level]||@@types[-1]
83
+ end
84
+ fbpath=FBPost.push(conf,title,'',type,path)
85
+ @@level+=1
86
+ content.each do |e|
87
+ e.to_feedbooks(conf,fbpath)
88
+ end
89
+ @@level-=1
90
+ end
91
+
92
+ alias :old_to_html :to_html
93
+
94
+ def to_html
95
+ ret=nil
96
+ ret="<h#{@@level+1}>"+title+"</h#{@@level+1}>"
97
+ @@level+=1
98
+ ret+=old_to_html
99
+ @@level-=1
100
+ ret
101
+ end
102
+ end
103
+
104
+ class Text
105
+ def to_feedbooks(conf,path=nil)
106
+ stxt=to_html
107
+ return unless stxt.strip.size > 0
108
+ doc=Hpricot('<div xmlns:xhtml="http://www.w3.org/1999/xhtml">'+stxt+'</div>')
109
+ doc.traverse_all_element do |e|
110
+ unless e.is_a?Hpricot::Text
111
+ e.name='xhtml:'+e.name
112
+ e.etag='xhtml:'+e.etag unless (!e.respond_to?:etag) || e.etag.nil?
113
+ end
114
+ end
115
+ FBPost.push(conf,'',doc.to_html,"Text",path)
116
+ end
117
+ end
118
+ end
119
+
120
+ def ask(txt,disp='Prompt')
121
+ return txt unless txt.nil? || txt =='#ask#'
122
+ begin
123
+ txt=nil
124
+ print disp+' : '
125
+ txt=STDIN.readline.strip
126
+ end while txt.nil? || txt.size==0
127
+ txt
128
+ end
data/lib/parser.rb ADDED
@@ -0,0 +1,335 @@
1
+ require 'hpricot'
2
+ require 'document.rb'
3
+ require 'progressbar'
4
+ #require 'ruby-prof'
5
+ #require 'term/ansicolor'
6
+ #include Term::ANSIColor
7
+
8
+ module HTML2FB
9
+ class Parser
10
+
11
+ def initialize(conf)
12
+ @conf=conf
13
+ end
14
+
15
+ def parse(txt)
16
+ puts "Parsing HTML"
17
+ pdoc=Hpricot(txt)
18
+ if @conf['conv']
19
+ mc=pdoc/'meta[@http-equiv="Content-Type"]'
20
+ if mc.size>0
21
+ charset=mc.first.attributes['content'].split(';').find do |s|
22
+ s.strip[0,7]=='charset'
23
+ end
24
+ unless charset.nil?
25
+ tc=charset.split('=').last.strip
26
+ end
27
+
28
+ unless tc.nil?
29
+ puts "Trying to convert source encoding from #{tc} to utf-8"
30
+ require 'iconv'
31
+ pdoc=Hpricot(Iconv.conv('utf-8',tc.downcase,txt))
32
+
33
+ end
34
+
35
+ end
36
+ end
37
+ doc=Document.new
38
+ puts "Removing garbage elements"
39
+ remove_objs(pdoc)
40
+ ti=pdoc.at('title')
41
+ doc.title= ti.extract_text.strip unless ti.nil?
42
+ # pdoc.search('//h3').each do |e|
43
+ # doc.content.push(e.inner_text)
44
+ # end
45
+
46
+ puts "Building TOC"
47
+ parse_text(pdoc,doc)
48
+
49
+ # puts green(bold(doc.pretty_inspect))
50
+
51
+ return doc
52
+ end
53
+ protected
54
+
55
+ def remove_objs(doc)
56
+ if @conf['remove'] then
57
+ @conf['remove']['class'].each do |cl|
58
+ doc.search('.'+cl).remove
59
+ end unless @conf['remove']['class'].nil?
60
+ @conf['remove']['expr'].each do |cl|
61
+ doc.search(cl).remove
62
+ end unless @conf['remove']['expr'].nil?
63
+ @conf['remove']['before'].each do |cl|
64
+ x=doc.at(cl)
65
+ if x
66
+ x.preceding.remove
67
+ x.parent.children.delete(x)
68
+ end
69
+ end unless @conf['remove']['before'].nil?
70
+ @conf['remove']['between'].each do |cl|
71
+ # puts "between "+cl.inspect
72
+ t=doc.between(cl.first,cl.last)
73
+ t.remove unless t.nil?
74
+ end unless @conf['remove']['between'].nil?
75
+ @conf['remove']['after'].each do |cl|
76
+ x=doc.at(cl)
77
+ if x
78
+ x.following.remove
79
+ x.parent.children.delete(x)
80
+ end
81
+ end unless @conf['remove']['after'].nil?
82
+ end
83
+ # File.open('/tmp/test.html','w'){|f| f.write doc.to_html}
84
+ end
85
+
86
+ def parse_text(doc,ret)
87
+ # RubyProf.start
88
+
89
+
90
+ aut=build_autom(@conf['select'],ret)
91
+
92
+ pbar = ProgressBar.new("Parsing", doc.search('//').size)
93
+ doc.traverse_all_element do |el|
94
+ aut.feed(el)
95
+ pbar.inc
96
+ end
97
+ pbar.finish
98
+ aut.finish(doc)
99
+ =begin
100
+ result = RubyProf.stop
101
+ printer = RubyProf::FlatPrinter.new(result)
102
+ printer.print(STDOUT, 0)
103
+ printer.print(File.new('/versatile/prof','w'),0)
104
+ printer = RubyProf::GraphHtmlPrinter.new(result)
105
+ printer.print(File.new('/versatile/profgraph.html','w'), :min_percent=>0)
106
+ printer = RubyProf::CallTreePrinter.new(result)
107
+ printer.print(File.new('/versatile/profgraph.tree','w'), :min_percent=>0)
108
+ =end
109
+ end
110
+
111
+ protected
112
+
113
+ def build_autom(conf_tab,doc)
114
+ mach=StateMachine.new
115
+ build_rec(mach,conf_tab)
116
+ mach.reset(doc)
117
+ mach
118
+ end
119
+
120
+ def build_rec(mach,conf_tab)
121
+ return if conf_tab.size < 1
122
+ exprs=conf_tab.collect{|e| e.reject{|k,v| k=='select'} }
123
+ mach.add_level(exprs)
124
+ build_rec(mach,conf_tab.collect{|e| e['select'] }.flatten.reject{|a|a.nil?})
125
+ end
126
+ end
127
+
128
+ class StateMachine
129
+
130
+ def initialize
131
+ @levels=[]
132
+ @current_level=0
133
+ @starts=[]
134
+ @done=[]
135
+ @max_level=0
136
+ @content=nil
137
+ end
138
+
139
+ def add_level(tab)
140
+ tab=[tab] unless tab.is_a?Array
141
+ @levels.push tab
142
+ @current_level+=1
143
+ end
144
+
145
+ def reset(doc)
146
+ @current_level=0
147
+ @max_level=@levels.size
148
+ @starts[0]=doc
149
+ @content='body'
150
+ end
151
+
152
+ def inspect
153
+ @levels.inspect+"\n"+@current_level.to_s+"\n\n"+@done.inspect
154
+ end
155
+
156
+ def create_fbsection(title,fblevel)
157
+ s=Section.new
158
+ s.fblevel=fblevel
159
+ s.title = title
160
+ s
161
+ end
162
+
163
+ def create_textNode(txt)
164
+ Text.new(txt)
165
+ end
166
+
167
+ def finish(doc)
168
+ unless @content.nil?
169
+ # t=create_textNode(doc.root.search(@content...doc.children.last.xpath))
170
+ t=create_textNode(doc.at(@content).following.to_html)
171
+ @starts[@current_level].content.push(t)
172
+ end
173
+ (1..@max_level).to_a.reverse.each do |l|
174
+ close_section(l)
175
+ end
176
+ @starts[0]
177
+ end
178
+
179
+ def open_section(obj,lvl,el)
180
+ if @content=='body'
181
+ tmp=el.preceding[0..-1]
182
+ else
183
+ tmp=el.root.search(@content...(el.xpath))[1..-1]
184
+ end
185
+ if tmp.blank? #search can'find between siblins
186
+ tmp=el.root.deep_between(@content,(el.xpath))
187
+ end
188
+ unless tmp.blank?
189
+ tmph=tmp.to_html
190
+ unless tmph.blank?
191
+ t=create_textNode(tmph)
192
+ @starts[@current_level].content.push(t)
193
+ end
194
+ end
195
+ (lvl..@max_level).to_a.reverse.each do |l|
196
+ close_section(l)
197
+ end
198
+ @starts[lvl]=create_fbsection(el.root.at(obj[:xpath]).extract_text,obj[:fblevel])
199
+ @content=obj[:xpath]
200
+ @current_level=lvl
201
+ end
202
+
203
+ def close_section(lvl)
204
+ return if @starts[lvl].nil?
205
+ llvl=lvl-1
206
+ llvl=llvl-1 until !@starts[llvl].nil?
207
+ @starts[llvl].content.push @starts[lvl]
208
+ @starts[lvl]=nil
209
+ end
210
+
211
+ def feed(el)
212
+ return if el.is_a?Hpricot::Text
213
+ @done=[[]*@levels.size]
214
+
215
+ @levels.each_with_index do |lvl,i|
216
+ lvl.each do |expr|
217
+ #puts i.to_s+" "+el.inspect if el.in_search?(expr['expr'])
218
+ if el.in_search?(expr['expr'])
219
+
220
+
221
+ open_section({:xpath => el.xpath, :fblevel => expr['fblevel']},i+1,el)
222
+ break
223
+ end
224
+ end
225
+ end
226
+
227
+ end
228
+ end
229
+ end
230
+
231
+
232
+ class String
233
+ def blank?
234
+ self !~ /\S/
235
+ end
236
+ end
237
+
238
+ class NilClass
239
+ def blank?
240
+ true
241
+ end
242
+ end
243
+
244
+ module Hpricot::Traverse
245
+ def in_search?(expr)
246
+ if expr !~ /[^a-z0-9]/
247
+ return self.name.downcase()==expr.downcase()
248
+ end
249
+
250
+ se_in=self.parent
251
+ if expr[0..1]=='/'
252
+ se_in=self.root
253
+ end
254
+ se_in.search(expr).each do |el|
255
+ return true if el==self
256
+ end
257
+ # puts self.name+" "+expr
258
+ return false
259
+ end
260
+
261
+ def root
262
+ return @root unless @root.nil?
263
+ se_in=self
264
+ se_in=se_in.parent until se_in.parent.nil?
265
+ @root=se_in
266
+ se_in
267
+ end
268
+
269
+ def between(a,b)
270
+ root.search(a..b)
271
+ end
272
+
273
+ def extract_text
274
+ t=''
275
+ self.traverse_all_element do |e|
276
+ t+=e.content.to_s if e.is_a?(Hpricot::Text)
277
+ end
278
+ t
279
+ end
280
+ def deep_between(i,j)
281
+
282
+ unless j.nil? || self.at(j).nil?
283
+ tm=self.at(i)
284
+ prec=tm.deep_preceding
285
+ r=Hpricot::Elements[*self.at(j).deep_preceding.find_all{|el| !(prec.include?el || el==tm)}]
286
+ else
287
+ r=self.at(i).deep_following unless self.at(i).nil?
288
+ end
289
+ Hpricot::Elements[*select_end(r,i)]
290
+ end
291
+
292
+ def select_end(tab,expr)
293
+
294
+ s=[]
295
+ f=false
296
+ idx=-1
297
+ i=0
298
+ tab.each do |e|
299
+ if e.search(expr.gsub(e.xpath,'.')).size > 0
300
+ idx=i
301
+ #if e.search(i).size > 0
302
+ if e.children.find{|ee| ee.xpath==expr }
303
+ e.children.each do |ee|
304
+ s << ee if f
305
+ f=true if ee.xpath==expr
306
+ end
307
+ else
308
+ s=select_end(e.children,expr)
309
+ end
310
+ break
311
+ else
312
+ i+=1
313
+ end
314
+ break if idx>0
315
+ end
316
+ return s+tab[(idx+1)..-1]
317
+ end
318
+
319
+ def deep_preceding()
320
+ ret=Hpricot::Elements[]
321
+ ret+=parent.deep_preceding if respond_to?(:parent) && !parent.is_a?(Hpricot::Doc )
322
+ ret+=preceding
323
+ Hpricot::Elements[*ret]
324
+ end
325
+ def deep_following()
326
+ ret=following
327
+ ret+=parent.deep_following if respond_to?(:parent) && !parent.is_a?(Hpricot::Doc )
328
+ Hpricot::Elements[*ret]
329
+ end
330
+
331
+ end
332
+
333
+ class Hpricot::Elements
334
+ alias_method :blank?, :empty?
335
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: Html2Feedbooks
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.7
5
+ platform: ruby
6
+ authors:
7
+ - Benoit Larroque
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-04-28 00:00:00 +02:00
13
+ default_executable: html2fb.rb
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.8.1
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: htmlentities
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "4.0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: launchy
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0.3"
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: progressbar
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 0.0.3
54
+ version:
55
+ description: Html2Feedbooks is script to automate basic publishing on feedbooks.com
56
+ email: zeta dot ben at gmail dot com
57
+ executables:
58
+ - html2fb.rb
59
+ extensions: []
60
+
61
+ extra_rdoc_files: []
62
+
63
+ files:
64
+ - README
65
+ - confs/conf.yaml
66
+ - lib/app.rb
67
+ - lib/conf.rb
68
+ - lib/document.rb
69
+ - lib/downloader.rb
70
+ - lib/feedbooks.rb
71
+ - bin/html2fb.rb
72
+ - lib/parser.rb
73
+ has_rdoc: true
74
+ homepage: http://github.com/Html2Feedbooks
75
+ licenses: []
76
+
77
+ post_install_message:
78
+ rdoc_options: []
79
+
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: "0"
87
+ version:
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: "0"
93
+ version:
94
+ requirements: []
95
+
96
+ rubyforge_project:
97
+ rubygems_version: 1.3.5
98
+ signing_key:
99
+ specification_version: 3
100
+ summary: Html2Feedbooks is script to automate basic publishing on feedbooks.com
101
+ test_files: []
102
+