Html2Feedbooks 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,13 @@
1
+ HTML2Feedbooks
2
+ ==============
3
+
4
+ A script to automate basic publishing work on Feedbooks.com.
5
+
6
+ Usage
7
+ -----
8
+
9
+ ./html2fb URL.html
10
+
11
+ You can change some settings by creating your own configuration file and using
12
+
13
+ html2fb -c myconf.yaml URL.html
data/bin/html2fb.rb ADDED
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/ruby
2
+ require 'optparse'
3
+ require 'open-uri'
4
+ require 'conf.rb'
5
+ require 'downloader.rb'
6
+ require 'document.rb'
7
+ require 'parser.rb'
8
+ require 'feedbooks.rb'
9
+ require 'tmpdir'
10
+ require 'launchy'
11
+
12
+ include HTML2FB
13
+
14
+ options = {}
15
+ options[:conf] = "conf.yaml"
16
+ options[:preview] = true
17
+ options[:conv] = true
18
+ OptionParser.new do |opts|
19
+ opts.banner = "Usage: html2fb [options] URL"
20
+
21
+ opts.on("-c", "--conf FILE", String,"Configuration file") do |f|
22
+ options[:conf] = f
23
+ end
24
+ opts.on("-s", "-s","Send to feedbooks") do |f|
25
+ options[:preview] = !f
26
+ end
27
+ opts.on("-nc", "--no-conv","No charset conversion") do |f|
28
+ options[:conv] = !f
29
+ end
30
+ end.parse!
31
+
32
+ valid=false
33
+ entry=ARGV[0]
34
+ while !valid
35
+ url=nil
36
+ begin
37
+ url=Downloader.valid_url?(entry)
38
+ valid=true
39
+ rescue Exception => e
40
+ STDERR.puts 'Invalid URL' unless entry.nil? || entry==''
41
+ valid=false
42
+ puts e
43
+ end
44
+ print "URL : " if entry.nil? || entry==''
45
+ entry=STDIN.readline.strip unless valid
46
+ end
47
+ conf=Conf.new(options[:conf],options[:conv])
48
+ content=Downloader.download(url)
49
+ #puts content.size
50
+ doc=Parser.new(conf).parse(content)
51
+ puts doc.toc.to_yaml
52
+ if options[:preview]
53
+ page=File.join(Dir.tmpdir(),Digest::MD5.hexdigest(url.to_s))+'.html'
54
+ f=File.open(page,'w')
55
+ f.write doc.to_html
56
+ f.close
57
+ puts "A preview of the parsed file should be opening in your webbrowser now"
58
+ puts "If nothing open you can open the file located at : #{page}"
59
+ puts "When happy with the parsed output rerun with -s option to send to Feedbooks.com"
60
+ Launchy::Browser.run(page)
61
+ else
62
+ doc.to_feedbooks(conf)
63
+ end
data/confs/conf.yaml ADDED
@@ -0,0 +1,25 @@
1
+ remove:
2
+ class:
3
+ - totoc
4
+ - pagenum
5
+ - totoi
6
+ - img
7
+ - pg
8
+ expr:
9
+ - 'table'
10
+ - //pre
11
+ - hr
12
+
13
+ select:
14
+ - expr: h2
15
+ fblevel: Part
16
+ select:
17
+ - expr: h3
18
+ fblevel: Chapter
19
+
20
+ fb:
21
+ user: #ask#
22
+ bookid: #ask#
23
+ booktype: #ask#
24
+ pass: #ask#
25
+ host: 'feedbooks.com'
data/lib/app.rb ADDED
@@ -0,0 +1,93 @@
1
+ require 'digest/md5'
2
+ require 'open-uri'
3
+ require 'net/http'
4
+ require 'time'
5
+ require 'htmlentities'
6
+ =begin
7
+ def colour(text, colour_code)
8
+ "#{colour_code}#{text}\e[0m"
9
+ end
10
+ def green(text); colour(text, "\e[32m"); end
11
+ def red(text); colour(text, "\e[31m"); end
12
+ def yellow(text); colour(text, "\e[33m"); end
13
+ def blue(text); colour(text, "\e[34m"); end
14
+ =end
15
+
16
+ class AtomPost
17
+ attr_accessor :title
18
+ attr_accessor :content
19
+ attr_accessor :date
20
+ attr_accessor :author
21
+ attr_accessor :addr
22
+ attr_accessor :user
23
+ attr_accessor :pass
24
+ attr_accessor :type
25
+
26
+ def initialize(addrs=nil)
27
+ self.addr=addrs unless addrs.nil?
28
+ end
29
+
30
+ def down_url(entry_url)
31
+ #STDERR.puts "scanning #{entry_url}"
32
+ url=URI.parse(entry_url)
33
+ Net::HTTP.start(url.host,url.port) {|http|
34
+ req = Net::HTTP::Get.new(url.path)
35
+ req.basic_auth user,pass unless user.nil?
36
+ response = http.request(req)
37
+ doc=Hpricot(response.body)
38
+ e=doc.at('//entry').at('link[@rel="down"]')
39
+ return URI.parse(e[:href]).path unless e.nil?
40
+ }
41
+ end
42
+
43
+ def send
44
+ raise StandardError.new('Missing Address') if addr.nil?
45
+ #3: Detailed control
46
+ url = URI.parse(addr)
47
+ #STDERR.puts "sending to #{url}"
48
+ req = Net::HTTP::Post.new(url.path)
49
+ req.basic_auth user,pass unless user.nil?
50
+
51
+ req.body = '<?xml version="1.0"?>'+"\n"
52
+ req.body +='<entry xmlns="http://www.w3.org/2005/Atom">'+"\n"
53
+ req.body +='<title>'+decode_text(title)+'</title>'+"\n"
54
+ req.body +='<id>'+Digest::MD5.hexdigest(title+content)+'</id>'+"\n"
55
+ req.body +='<updated>'+date.xmlschema+'</updated>'+"\n"
56
+ req.body +='<author><name>'+author+'</name></author>'+"\n"
57
+ req.body +='<content>'+recode_text(content)+'</content>'+"\n"
58
+ req.body +='<category label="'+type+'" term="'+type+'" />'+"\n" unless type.nil?
59
+ req.body +='</entry>'+"\n"
60
+
61
+ req.set_content_type('application/atom+xml;type=entry')
62
+
63
+ # STDERR.puts red("Send \n #{req.body.size > 500 ? req.body[0..250]+'[...]'+req.body[-250..-1]: req.body}")
64
+
65
+ res = Net::HTTP.new(url.host, url.port).start {|http| http.request(req) }
66
+ case res
67
+ when Net::HTTPSuccess, Net::HTTPRedirection
68
+ # STDERR.puts green(res['location']) if res['location']
69
+ res['location'] if res['location']
70
+ else
71
+ res.error!
72
+ end
73
+ end
74
+
75
+ def recode_text(txt)
76
+ return txt if txt.blank?
77
+ m=Hpricot(txt)
78
+ m.traverse_text{|t| t.content=force_decimal_entities(t.content) if t.content.match(/&[a-z][a-z0-9]+;/i)}
79
+ m.to_html
80
+ end
81
+ HTMLENCODER=HTMLEntities.new
82
+ def force_decimal_entities(txt)
83
+ HTMLENCODER.encode(HTMLENCODER.decode(txt),:decimal)
84
+ end
85
+
86
+ def decode_text(txt)
87
+ return txt if txt.blank?
88
+ m=Hpricot(txt)
89
+ m.traverse_text{|t| HTMLENCODER.decode(t.content)}
90
+ m.to_html
91
+ end
92
+
93
+ end
data/lib/conf.rb ADDED
@@ -0,0 +1,25 @@
1
+ require 'yaml'
2
+
3
+ module HTML2FB
4
+ class Conf
5
+ def initialize(file,conv)
6
+ ['','./',"#{File.dirname(__FILE__)}/","#{File.dirname(__FILE__)}/../confs/"].each do |p|
7
+ f=p+file
8
+ begin
9
+ if File.readable?(f) && File.exists?(f)
10
+ @conf=File.open(f,'r'){|txt| YAML::load(txt)}
11
+ puts "loaded config file : "+f
12
+ @conf['conv']=conv
13
+ return
14
+ end
15
+ rescue Exception => e
16
+ STDERR.puts('unreadable conf : '+f+"\n"+e)
17
+ end
18
+ end
19
+ end
20
+
21
+ def [](x)
22
+ @conf[x]
23
+ end
24
+ end
25
+ end
data/lib/document.rb ADDED
@@ -0,0 +1,71 @@
1
+ module HTML2FB
2
+
3
+ class Section
4
+ attr_accessor :title
5
+ attr_accessor :content
6
+ attr_accessor :fblevel
7
+
8
+ def initialize
9
+ @content=[]
10
+ end
11
+
12
+ def to_html
13
+ content.collect{|e|e.to_html}.join
14
+ end
15
+
16
+ def decorated_title
17
+ unless fblevel.nil?
18
+ "[#{fblevel}] "+title
19
+ else
20
+ title
21
+ end
22
+ end
23
+
24
+ def titles
25
+ tit=[]
26
+ content.each do |f|
27
+ # if f.is_a?Section
28
+ tit.push f.titles
29
+ # else
30
+ # tit.push '#text'
31
+ # end
32
+ end
33
+
34
+ return [decorated_title,tit]
35
+ end
36
+
37
+ def to_s
38
+ return "title :#{title} \n"+content.collect{|a|a.to_s}.join("\n\n")
39
+ end
40
+ end
41
+
42
+ class Document < Section
43
+ def toc
44
+ #return content
45
+ return content.collect{|a|
46
+ a.titles
47
+ }
48
+ end
49
+
50
+ end
51
+
52
+ class Text
53
+ attr_accessor :content
54
+
55
+ def initialize(c='')
56
+ @content=c
57
+ end
58
+
59
+ def to_html
60
+ @content
61
+ end
62
+
63
+ def to_s
64
+ @content
65
+ end
66
+
67
+ def titles
68
+ return ['#text']
69
+ end
70
+ end
71
+ end
data/lib/downloader.rb ADDED
@@ -0,0 +1,24 @@
1
+ require 'open-uri'
2
+ require 'tempfile'
3
+
4
+ module HTML2FB
5
+ class Downloader
6
+ def self.valid_url?(entry)
7
+ uri=URI.parse(entry)
8
+ Kernel.open(uri.to_s,'r')
9
+ return uri
10
+ end
11
+
12
+ def self.download(uri)
13
+ print "Downloading "
14
+ puts uri.to_s
15
+ #tmp=Tempfile.new(uri.gsub(/[^a-z0-9]/,'_'))
16
+ #tmp.open('w'){|a|
17
+ # uri.open('r'){|b|
18
+ # a.write b
19
+ # }
20
+ #}
21
+ Kernel.open(uri.to_s,'r').read
22
+ end
23
+ end
24
+ end
data/lib/feedbooks.rb ADDED
@@ -0,0 +1,128 @@
1
+ require 'app.rb'
2
+ require 'hpricot'
3
+ require 'digest/md5'
4
+
5
+ module HTML2FB
6
+
7
+ class FBSession
8
+
9
+ attr_accessor :bookid
10
+ attr_accessor :booktype
11
+ attr_accessor :user
12
+ attr_accessor :pass
13
+ attr_accessor :host
14
+ @@fbsession=nil
15
+ def initialize(conf)
16
+ StandardError.new('Already in session') unless @@fbsession.nil?
17
+ @@fbsession=self
18
+ self.bookid=ask(conf['fb']['bookid'],"Book Id")
19
+ self.booktype=ask(conf['fb']['booktype'],"Book Type")
20
+ self.user=ask(conf['fb']['user'],"User")
21
+ self.pass=ask(conf['fb']['pass'],"Pass")
22
+ self.host=conf['fb']['host']
23
+ self.host='feedbooks.com' if @host.nil?
24
+ end
25
+
26
+ def self.session
27
+ return @@fbsession
28
+ end
29
+
30
+ def pass=(pas)
31
+
32
+ if pas.gsub(/[^a-z0-9]/,'').size==32
33
+ @pass=pas
34
+ else
35
+ @pass= Digest::MD5.hexdigest(pas)
36
+ end
37
+ end
38
+ end
39
+
40
+
41
+ class Document
42
+ def to_feedbooks(conf)
43
+ FBSession.new(conf)
44
+ #File.open('/tmp/test3.html','w') do |f|
45
+ content.each do |e|
46
+ # f << e.to_feedbooks(conf)
47
+ e.to_feedbooks(conf,nil)
48
+ # f << " \n " * 10
49
+ end
50
+ #end
51
+ end
52
+ end
53
+
54
+ class FBPost
55
+ def self.push(conf,tit,cont,type,path=nil)
56
+ puts "Sending to feedbooks #{tit} with type #{type}"
57
+ fb=FBSession.session
58
+ if path.nil?
59
+ post=AtomPost.new "http://#{fb.host}/#{fb.booktype}/#{fb.bookid}/contents.atom"
60
+ else
61
+ post=AtomPost.new "http://#{fb.host}#{path}"
62
+ end
63
+
64
+ post.content=cont
65
+ post.user=fb.user
66
+ post.pass=fb.pass
67
+ post.date=Time.now
68
+ post.author=fb.user
69
+ post.title=tit
70
+ post.type=type
71
+ s=post.send
72
+ post.down_url(s) unless s.nil?
73
+ end
74
+ end
75
+
76
+ class Section
77
+ @@level=0
78
+ @@types=['Part','Chapter','Section']
79
+ def to_feedbooks(conf,path=nil)
80
+ type=self.fblevel.to_s.downcase.strip.capitalize
81
+ unless @@types.include?type
82
+ type=@@types[@@level]||@@types[-1]
83
+ end
84
+ fbpath=FBPost.push(conf,title,'',type,path)
85
+ @@level+=1
86
+ content.each do |e|
87
+ e.to_feedbooks(conf,fbpath)
88
+ end
89
+ @@level-=1
90
+ end
91
+
92
+ alias :old_to_html :to_html
93
+
94
+ def to_html
95
+ ret=nil
96
+ ret="<h#{@@level+1}>"+title+"</h#{@@level+1}>"
97
+ @@level+=1
98
+ ret+=old_to_html
99
+ @@level-=1
100
+ ret
101
+ end
102
+ end
103
+
104
+ class Text
105
+ def to_feedbooks(conf,path=nil)
106
+ stxt=to_html
107
+ return unless stxt.strip.size > 0
108
+ doc=Hpricot('<div xmlns:xhtml="http://www.w3.org/1999/xhtml">'+stxt+'</div>')
109
+ doc.traverse_all_element do |e|
110
+ unless e.is_a?Hpricot::Text
111
+ e.name='xhtml:'+e.name
112
+ e.etag='xhtml:'+e.etag unless (!e.respond_to?:etag) || e.etag.nil?
113
+ end
114
+ end
115
+ FBPost.push(conf,'',doc.to_html,"Text",path)
116
+ end
117
+ end
118
+ end
119
+
120
+ def ask(txt,disp='Prompt')
121
+ return txt unless txt.nil? || txt =='#ask#'
122
+ begin
123
+ txt=nil
124
+ print disp+' : '
125
+ txt=STDIN.readline.strip
126
+ end while txt.nil? || txt.size==0
127
+ txt
128
+ end
data/lib/parser.rb ADDED
@@ -0,0 +1,335 @@
1
+ require 'hpricot'
2
+ require 'document.rb'
3
+ require 'progressbar'
4
+ #require 'ruby-prof'
5
+ #require 'term/ansicolor'
6
+ #include Term::ANSIColor
7
+
8
+ module HTML2FB
9
+ class Parser
10
+
11
+ def initialize(conf)
12
+ @conf=conf
13
+ end
14
+
15
+ def parse(txt)
16
+ puts "Parsing HTML"
17
+ pdoc=Hpricot(txt)
18
+ if @conf['conv']
19
+ mc=pdoc/'meta[@http-equiv="Content-Type"]'
20
+ if mc.size>0
21
+ charset=mc.first.attributes['content'].split(';').find do |s|
22
+ s.strip[0,7]=='charset'
23
+ end
24
+ unless charset.nil?
25
+ tc=charset.split('=').last.strip
26
+ end
27
+
28
+ unless tc.nil?
29
+ puts "Trying to convert source encoding from #{tc} to utf-8"
30
+ require 'iconv'
31
+ pdoc=Hpricot(Iconv.conv('utf-8',tc.downcase,txt))
32
+
33
+ end
34
+
35
+ end
36
+ end
37
+ doc=Document.new
38
+ puts "Removing garbage elements"
39
+ remove_objs(pdoc)
40
+ ti=pdoc.at('title')
41
+ doc.title= ti.extract_text.strip unless ti.nil?
42
+ # pdoc.search('//h3').each do |e|
43
+ # doc.content.push(e.inner_text)
44
+ # end
45
+
46
+ puts "Building TOC"
47
+ parse_text(pdoc,doc)
48
+
49
+ # puts green(bold(doc.pretty_inspect))
50
+
51
+ return doc
52
+ end
53
+ protected
54
+
55
+ def remove_objs(doc)
56
+ if @conf['remove'] then
57
+ @conf['remove']['class'].each do |cl|
58
+ doc.search('.'+cl).remove
59
+ end unless @conf['remove']['class'].nil?
60
+ @conf['remove']['expr'].each do |cl|
61
+ doc.search(cl).remove
62
+ end unless @conf['remove']['expr'].nil?
63
+ @conf['remove']['before'].each do |cl|
64
+ x=doc.at(cl)
65
+ if x
66
+ x.preceding.remove
67
+ x.parent.children.delete(x)
68
+ end
69
+ end unless @conf['remove']['before'].nil?
70
+ @conf['remove']['between'].each do |cl|
71
+ # puts "between "+cl.inspect
72
+ t=doc.between(cl.first,cl.last)
73
+ t.remove unless t.nil?
74
+ end unless @conf['remove']['between'].nil?
75
+ @conf['remove']['after'].each do |cl|
76
+ x=doc.at(cl)
77
+ if x
78
+ x.following.remove
79
+ x.parent.children.delete(x)
80
+ end
81
+ end unless @conf['remove']['after'].nil?
82
+ end
83
+ # File.open('/tmp/test.html','w'){|f| f.write doc.to_html}
84
+ end
85
+
86
+ def parse_text(doc,ret)
87
+ # RubyProf.start
88
+
89
+
90
+ aut=build_autom(@conf['select'],ret)
91
+
92
+ pbar = ProgressBar.new("Parsing", doc.search('//').size)
93
+ doc.traverse_all_element do |el|
94
+ aut.feed(el)
95
+ pbar.inc
96
+ end
97
+ pbar.finish
98
+ aut.finish(doc)
99
+ =begin
100
+ result = RubyProf.stop
101
+ printer = RubyProf::FlatPrinter.new(result)
102
+ printer.print(STDOUT, 0)
103
+ printer.print(File.new('/versatile/prof','w'),0)
104
+ printer = RubyProf::GraphHtmlPrinter.new(result)
105
+ printer.print(File.new('/versatile/profgraph.html','w'), :min_percent=>0)
106
+ printer = RubyProf::CallTreePrinter.new(result)
107
+ printer.print(File.new('/versatile/profgraph.tree','w'), :min_percent=>0)
108
+ =end
109
+ end
110
+
111
+ protected
112
+
113
+ def build_autom(conf_tab,doc)
114
+ mach=StateMachine.new
115
+ build_rec(mach,conf_tab)
116
+ mach.reset(doc)
117
+ mach
118
+ end
119
+
120
+ def build_rec(mach,conf_tab)
121
+ return if conf_tab.size < 1
122
+ exprs=conf_tab.collect{|e| e.reject{|k,v| k=='select'} }
123
+ mach.add_level(exprs)
124
+ build_rec(mach,conf_tab.collect{|e| e['select'] }.flatten.reject{|a|a.nil?})
125
+ end
126
+ end
127
+
128
+ class StateMachine
129
+
130
+ def initialize
131
+ @levels=[]
132
+ @current_level=0
133
+ @starts=[]
134
+ @done=[]
135
+ @max_level=0
136
+ @content=nil
137
+ end
138
+
139
+ def add_level(tab)
140
+ tab=[tab] unless tab.is_a?Array
141
+ @levels.push tab
142
+ @current_level+=1
143
+ end
144
+
145
+ def reset(doc)
146
+ @current_level=0
147
+ @max_level=@levels.size
148
+ @starts[0]=doc
149
+ @content='body'
150
+ end
151
+
152
+ def inspect
153
+ @levels.inspect+"\n"+@current_level.to_s+"\n\n"+@done.inspect
154
+ end
155
+
156
+ def create_fbsection(title,fblevel)
157
+ s=Section.new
158
+ s.fblevel=fblevel
159
+ s.title = title
160
+ s
161
+ end
162
+
163
+ def create_textNode(txt)
164
+ Text.new(txt)
165
+ end
166
+
167
+ def finish(doc)
168
+ unless @content.nil?
169
+ # t=create_textNode(doc.root.search(@content...doc.children.last.xpath))
170
+ t=create_textNode(doc.at(@content).following.to_html)
171
+ @starts[@current_level].content.push(t)
172
+ end
173
+ (1..@max_level).to_a.reverse.each do |l|
174
+ close_section(l)
175
+ end
176
+ @starts[0]
177
+ end
178
+
179
+ def open_section(obj,lvl,el)
180
+ if @content=='body'
181
+ tmp=el.preceding[0..-1]
182
+ else
183
+ tmp=el.root.search(@content...(el.xpath))[1..-1]
184
+ end
185
+ if tmp.blank? #search can'find between siblins
186
+ tmp=el.root.deep_between(@content,(el.xpath))
187
+ end
188
+ unless tmp.blank?
189
+ tmph=tmp.to_html
190
+ unless tmph.blank?
191
+ t=create_textNode(tmph)
192
+ @starts[@current_level].content.push(t)
193
+ end
194
+ end
195
+ (lvl..@max_level).to_a.reverse.each do |l|
196
+ close_section(l)
197
+ end
198
+ @starts[lvl]=create_fbsection(el.root.at(obj[:xpath]).extract_text,obj[:fblevel])
199
+ @content=obj[:xpath]
200
+ @current_level=lvl
201
+ end
202
+
203
+ def close_section(lvl)
204
+ return if @starts[lvl].nil?
205
+ llvl=lvl-1
206
+ llvl=llvl-1 until !@starts[llvl].nil?
207
+ @starts[llvl].content.push @starts[lvl]
208
+ @starts[lvl]=nil
209
+ end
210
+
211
+ def feed(el)
212
+ return if el.is_a?Hpricot::Text
213
+ @done=[[]*@levels.size]
214
+
215
+ @levels.each_with_index do |lvl,i|
216
+ lvl.each do |expr|
217
+ #puts i.to_s+" "+el.inspect if el.in_search?(expr['expr'])
218
+ if el.in_search?(expr['expr'])
219
+
220
+
221
+ open_section({:xpath => el.xpath, :fblevel => expr['fblevel']},i+1,el)
222
+ break
223
+ end
224
+ end
225
+ end
226
+
227
+ end
228
+ end
229
+ end
230
+
231
+
232
+ class String
233
+ def blank?
234
+ self !~ /\S/
235
+ end
236
+ end
237
+
238
+ class NilClass
239
+ def blank?
240
+ true
241
+ end
242
+ end
243
+
244
+ module Hpricot::Traverse
245
+ def in_search?(expr)
246
+ if expr !~ /[^a-z0-9]/
247
+ return self.name.downcase()==expr.downcase()
248
+ end
249
+
250
+ se_in=self.parent
251
+ if expr[0..1]=='/'
252
+ se_in=self.root
253
+ end
254
+ se_in.search(expr).each do |el|
255
+ return true if el==self
256
+ end
257
+ # puts self.name+" "+expr
258
+ return false
259
+ end
260
+
261
+ def root
262
+ return @root unless @root.nil?
263
+ se_in=self
264
+ se_in=se_in.parent until se_in.parent.nil?
265
+ @root=se_in
266
+ se_in
267
+ end
268
+
269
+ def between(a,b)
270
+ root.search(a..b)
271
+ end
272
+
273
+ def extract_text
274
+ t=''
275
+ self.traverse_all_element do |e|
276
+ t+=e.content.to_s if e.is_a?(Hpricot::Text)
277
+ end
278
+ t
279
+ end
280
+ def deep_between(i,j)
281
+
282
+ unless j.nil? || self.at(j).nil?
283
+ tm=self.at(i)
284
+ prec=tm.deep_preceding
285
+ r=Hpricot::Elements[*self.at(j).deep_preceding.find_all{|el| !(prec.include?el || el==tm)}]
286
+ else
287
+ r=self.at(i).deep_following unless self.at(i).nil?
288
+ end
289
+ Hpricot::Elements[*select_end(r,i)]
290
+ end
291
+
292
+ def select_end(tab,expr)
293
+
294
+ s=[]
295
+ f=false
296
+ idx=-1
297
+ i=0
298
+ tab.each do |e|
299
+ if e.search(expr.gsub(e.xpath,'.')).size > 0
300
+ idx=i
301
+ #if e.search(i).size > 0
302
+ if e.children.find{|ee| ee.xpath==expr }
303
+ e.children.each do |ee|
304
+ s << ee if f
305
+ f=true if ee.xpath==expr
306
+ end
307
+ else
308
+ s=select_end(e.children,expr)
309
+ end
310
+ break
311
+ else
312
+ i+=1
313
+ end
314
+ break if idx>0
315
+ end
316
+ return s+tab[(idx+1)..-1]
317
+ end
318
+
319
+ def deep_preceding()
320
+ ret=Hpricot::Elements[]
321
+ ret+=parent.deep_preceding if respond_to?(:parent) && !parent.is_a?(Hpricot::Doc )
322
+ ret+=preceding
323
+ Hpricot::Elements[*ret]
324
+ end
325
+ def deep_following()
326
+ ret=following
327
+ ret+=parent.deep_following if respond_to?(:parent) && !parent.is_a?(Hpricot::Doc )
328
+ Hpricot::Elements[*ret]
329
+ end
330
+
331
+ end
332
+
333
+ class Hpricot::Elements
334
+ alias_method :blank?, :empty?
335
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: Html2Feedbooks
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.7
5
+ platform: ruby
6
+ authors:
7
+ - Benoit Larroque
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-04-28 00:00:00 +02:00
13
+ default_executable: html2fb.rb
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.8.1
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: htmlentities
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "4.0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: launchy
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0.3"
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: progressbar
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 0.0.3
54
+ version:
55
+ description: Html2Feedbooks is script to automate basic publishing on feedbooks.com
56
+ email: zeta dot ben at gmail dot com
57
+ executables:
58
+ - html2fb.rb
59
+ extensions: []
60
+
61
+ extra_rdoc_files: []
62
+
63
+ files:
64
+ - README
65
+ - confs/conf.yaml
66
+ - lib/app.rb
67
+ - lib/conf.rb
68
+ - lib/document.rb
69
+ - lib/downloader.rb
70
+ - lib/feedbooks.rb
71
+ - bin/html2fb.rb
72
+ - lib/parser.rb
73
+ has_rdoc: true
74
+ homepage: http://github.com/Html2Feedbooks
75
+ licenses: []
76
+
77
+ post_install_message:
78
+ rdoc_options: []
79
+
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: "0"
87
+ version:
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: "0"
93
+ version:
94
+ requirements: []
95
+
96
+ rubyforge_project:
97
+ rubygems_version: 1.3.5
98
+ signing_key:
99
+ specification_version: 3
100
+ summary: Html2Feedbooks is script to automate basic publishing on feedbooks.com
101
+ test_files: []
102
+