zetaben-Html2Feedbooks 1.0.5 → 1.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/bin/html2fb.rb +5 -1
  2. data/lib/conf.rb +2 -1
  3. data/lib/parser.rb +19 -0
  4. metadata +1 -1
data/bin/html2fb.rb CHANGED
@@ -14,6 +14,7 @@ include HTML2FB
14
14
  options = {}
15
15
  options[:conf] = "conf.yaml"
16
16
  options[:preview] = true
17
+ options[:conv] = true
17
18
  OptionParser.new do |opts|
18
19
  opts.banner = "Usage: html2fb [options] URL"
19
20
 
@@ -23,6 +24,9 @@ OptionParser.new do |opts|
23
24
  opts.on("-s", "-s","Send to feedbooks") do |f|
24
25
  options[:preview] = !f
25
26
  end
27
+ opts.on("-nc", "--no-conv","No charset conversion") do |f|
28
+ options[:conv] = !f
29
+ end
26
30
  end.parse!
27
31
 
28
32
  valid=false
@@ -40,7 +44,7 @@ while !valid
40
44
  print "URL : " if entry.nil? || entry==''
41
45
  entry=STDIN.readline.strip unless valid
42
46
  end
43
- conf=Conf.new(options[:conf])
47
+ conf=Conf.new(options[:conf],options[:conv])
44
48
  content=Downloader.download(url)
45
49
  #puts content.size
46
50
  doc=Parser.new(conf).parse(content)
data/lib/conf.rb CHANGED
@@ -2,13 +2,14 @@ require 'yaml'
2
2
 
3
3
  module HTML2FB
4
4
  class Conf
5
- def initialize(file)
5
+ def initialize(file,conv)
6
6
  ['','./',"#{File.dirname(__FILE__)}/","#{File.dirname(__FILE__)}/../confs/"].each do |p|
7
7
  f=p+file
8
8
  begin
9
9
  if File.readable?(f) && File.exists?(f)
10
10
  @conf=File.open(f,'r'){|txt| YAML::load(txt)}
11
11
  puts "loaded config file : "+f
12
+ @conf['conv']=conv
12
13
  return
13
14
  end
14
15
  rescue Exception => e
data/lib/parser.rb CHANGED
@@ -15,6 +15,25 @@ module HTML2FB
15
15
  def parse(txt)
16
16
  puts "Parsing HTML"
17
17
  pdoc=Hpricot(txt)
18
+ if @conf['conv']
19
+ mc=pdoc/'meta[@http-equiv="Content-Type"]'
20
+ if mc.size>0
21
+ charset=mc.first.attributes['content'].split(';').find do |s|
22
+ s.strip[0,7]=='charset'
23
+ end
24
+ unless charset.nil?
25
+ tc=charset.split('=').last.strip
26
+ end
27
+
28
+ unless tc.nil?
29
+ puts "Trying to convert source encoding from #{tc} to utf-8"
30
+ require 'iconv'
31
+ pdoc=Hpricot(Iconv.conv('utf-8',tc.downcase,txt))
32
+
33
+ end
34
+
35
+ end
36
+ end
18
37
  doc=Document.new
19
38
  puts "Removing garbage elements"
20
39
  remove_objs(pdoc)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zetaben-Html2Feedbooks
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benoit Larroque