colread 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/colread +7 -8
  3. data/lib/colread.rb +11 -4
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a5110d542174b574c2684dcfbeca7cad9fc7ecc6
4
- data.tar.gz: 1d2ffdab33b442c885ff8150c3256b08c1dd0f35
3
+ metadata.gz: eaa58fe8f40e78e6e9e838c227c863bea2fe349f
4
+ data.tar.gz: 6d25ee9b43562df83027b0beaf301f99f9643b26
5
5
  SHA512:
6
- metadata.gz: 41f05aff1a11775039a4cd4baea4aee38af87ca5ca71ff95f8b04deb773bfeec6e1396659e4066be2fae0ccebc74c66d2e4ec1a849a44830780a1a1ed0e6dabf
7
- data.tar.gz: 80f1e2405b4c100464758550fe5d82105024efcdb9dd0e20e2ab0a9d66f6c8b875ee3fe31175983145cf7bdfa40ef571a0f233d31468fed35689c8b64aae9a3b
6
+ metadata.gz: e97b37fac6351f7400ed4e0d267f97ba556d0459c407e55ef68e01f55da38928cc44a5b6bcdb3a01d133c38c2b441999664ea786272cec524df566485cc432ab
7
+ data.tar.gz: 19ebb47c9bdf28e017d880de748123026ffaf71cdb8d068794e0a2792db7e2e3963c0ac93bfccec5a8e811f62e828f45360edf603ec4555fd70da800e748d92d
@@ -1,16 +1,15 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'colread'
3
+ require File.dirname(__FILE__) + '/../lib/colread'
4
4
  require 'rubygems'
5
5
  require 'optparse'
6
- #require File.dirname(__FILE__) + '/../lib/readability'
7
6
 
8
- options = { :debug => false, :images => false }
7
+ options = { :encode => 'GBK' }
9
8
  options_parser = OptionParser.new do |opts|
10
- opts.banner = "Usage: #{File.basename($0)} [options] URL FileName"
9
+ opts.banner = "Usage: #{File.basename($0)} [options] Url FileName"
11
10
 
12
- opts.on("-d", "--debug", "Show debug output") do |v|
13
- options[:debug] = v
11
+ opts.on("-e encode", "--encode=encode", "page encode") do |e|
12
+ options[:encode] = e
14
13
  end
15
14
 
16
15
  opts.on_tail("-h", "--help", "Show this message") do
@@ -20,9 +19,9 @@ options_parser = OptionParser.new do |opts|
20
19
  end
21
20
  options_parser.parse!
22
21
 
23
- if ARGV.length != 2
22
+ if ARGV.length < 2
24
23
  STDERR.puts options_parser
25
24
  exit 1
26
25
  end
27
26
 
28
- ColRead::Text.new(url: ARGV.first, output: ARGV.last).start
27
+ ColRead::Text.new(url: ARGV.first, output: ARGV.last, encode: options[:encode]).start
@@ -15,8 +15,8 @@ end
15
15
  module ColRead
16
16
  module Core
17
17
  def chapters url
18
- doc = Nokogiri::HTML(open(url))
19
- doc.css('a').group_by{|a| a.indent}.sort_by{|a| a.last.count}.last.last
18
+ doc = Nokogiri::HTML(_open(url))
19
+ doc.css('a').select{|a| a.text =~ /\S/ }.group_by{|a| a.indent}.sort_by{|a| a.last.count}.last.last
20
20
  end
21
21
 
22
22
  def ahref a
@@ -31,18 +31,25 @@ module ColRead
31
31
 
32
32
  def contents chapters
33
33
  chapters.each do |a|
34
- source = open(ahref(a)).read.encode('utf-8')
34
+ source = _open(ahref(a))
35
35
  content=Nokogiri::HTML(Readability::Document.new(source).content).text
36
36
  yield [a.text, content]
37
37
  end
38
38
  end
39
+
40
+ def _open url
41
+ source = open(url).read
42
+ source.force_encoding(@encode)
43
+ source.encode!("utf-8", :undef => :replace, :replace => "?", :invalid => :replace)
44
+ end
39
45
  end
40
46
 
41
47
  class Text
42
48
  include ColRead::Core
43
- def initialize(options={url: '',output: ''})
49
+ def initialize(options={url: '',output: '', encode: ''})
44
50
  @url=options[:url]
45
51
  @output=options[:output]
52
+ @encode=options[:encode]
46
53
  @root=@url.split(/(?<!\/)\/(?!\/)/).first
47
54
  end
48
55
  def start
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: colread
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yunjie chen