zetaben-Html2Feedbooks 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/bin/html2fb.rb +5 -1
  2. data/lib/conf.rb +2 -1
  3. data/lib/parser.rb +19 -0
  4. metadata +1 -1
data/bin/html2fb.rb CHANGED
@@ -14,6 +14,7 @@ include HTML2FB
14
14
  options = {}
15
15
  options[:conf] = "conf.yaml"
16
16
  options[:preview] = true
17
+ options[:conv] = true
17
18
  OptionParser.new do |opts|
18
19
  opts.banner = "Usage: html2fb [options] URL"
19
20
 
@@ -23,6 +24,9 @@ OptionParser.new do |opts|
23
24
  opts.on("-s", "-s","Send to feedbooks") do |f|
24
25
  options[:preview] = !f
25
26
  end
27
+ opts.on("-nc", "--no-conv","No charset conversion") do |f|
28
+ options[:conv] = !f
29
+ end
26
30
  end.parse!
27
31
 
28
32
  valid=false
@@ -40,7 +44,7 @@ while !valid
40
44
  print "URL : " if entry.nil? || entry==''
41
45
  entry=STDIN.readline.strip unless valid
42
46
  end
43
- conf=Conf.new(options[:conf])
47
+ conf=Conf.new(options[:conf],options[:conv])
44
48
  content=Downloader.download(url)
45
49
  #puts content.size
46
50
  doc=Parser.new(conf).parse(content)
data/lib/conf.rb CHANGED
@@ -2,13 +2,14 @@ require 'yaml'
2
2
 
3
3
  module HTML2FB
4
4
  class Conf
5
- def initialize(file)
5
+ def initialize(file,conv)
6
6
  ['','./',"#{File.dirname(__FILE__)}/","#{File.dirname(__FILE__)}/../confs/"].each do |p|
7
7
  f=p+file
8
8
  begin
9
9
  if File.readable?(f) && File.exists?(f)
10
10
  @conf=File.open(f,'r'){|txt| YAML::load(txt)}
11
11
  puts "loaded config file : "+f
12
+ @conf['conv']=conv
12
13
  return
13
14
  end
14
15
  rescue Exception => e
data/lib/parser.rb CHANGED
@@ -15,6 +15,25 @@ module HTML2FB
15
15
  def parse(txt)
16
16
  puts "Parsing HTML"
17
17
  pdoc=Hpricot(txt)
18
+ if @conf['conv']
19
+ mc=pdoc/'meta[@http-equiv="Content-Type"]'
20
+ if mc.size>0
21
+ charset=mc.first.attributes['content'].split(';').find do |s|
22
+ s.strip[0,7]=='charset'
23
+ end
24
+ unless charset.nil?
25
+ tc=charset.split('=').last.strip
26
+ end
27
+
28
+ unless tc.nil?
29
+ puts "Trying to convert source encoding from #{tc} to utf-8"
30
+ require 'iconv'
31
+ pdoc=Hpricot(Iconv.conv('utf-8',tc.downcase,txt))
32
+
33
+ end
34
+
35
+ end
36
+ end
18
37
  doc=Document.new
19
38
  puts "Removing garbage elements"
20
39
  remove_objs(pdoc)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zetaben-Html2Feedbooks
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benoit Larroque