colread 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/colread +7 -8
  3. data/lib/colread.rb +11 -4
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a5110d542174b574c2684dcfbeca7cad9fc7ecc6
4
- data.tar.gz: 1d2ffdab33b442c885ff8150c3256b08c1dd0f35
3
+ metadata.gz: eaa58fe8f40e78e6e9e838c227c863bea2fe349f
4
+ data.tar.gz: 6d25ee9b43562df83027b0beaf301f99f9643b26
5
5
  SHA512:
6
- metadata.gz: 41f05aff1a11775039a4cd4baea4aee38af87ca5ca71ff95f8b04deb773bfeec6e1396659e4066be2fae0ccebc74c66d2e4ec1a849a44830780a1a1ed0e6dabf
7
- data.tar.gz: 80f1e2405b4c100464758550fe5d82105024efcdb9dd0e20e2ab0a9d66f6c8b875ee3fe31175983145cf7bdfa40ef571a0f233d31468fed35689c8b64aae9a3b
6
+ metadata.gz: e97b37fac6351f7400ed4e0d267f97ba556d0459c407e55ef68e01f55da38928cc44a5b6bcdb3a01d133c38c2b441999664ea786272cec524df566485cc432ab
7
+ data.tar.gz: 19ebb47c9bdf28e017d880de748123026ffaf71cdb8d068794e0a2792db7e2e3963c0ac93bfccec5a8e811f62e828f45360edf603ec4555fd70da800e748d92d
@@ -1,16 +1,15 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'colread'
3
+ require File.dirname(__FILE__) + '/../lib/colread'
4
4
  require 'rubygems'
5
5
  require 'optparse'
6
- #require File.dirname(__FILE__) + '/../lib/readability'
7
6
 
8
- options = { :debug => false, :images => false }
7
+ options = { :encode => 'GBK' }
9
8
  options_parser = OptionParser.new do |opts|
10
- opts.banner = "Usage: #{File.basename($0)} [options] URL FileName"
9
+ opts.banner = "Usage: #{File.basename($0)} [options] Url FileName"
11
10
 
12
- opts.on("-d", "--debug", "Show debug output") do |v|
13
- options[:debug] = v
11
+ opts.on("-e encode", "--encode=encode", "page encode") do |e|
12
+ options[:encode] = e
14
13
  end
15
14
 
16
15
  opts.on_tail("-h", "--help", "Show this message") do
@@ -20,9 +19,9 @@ options_parser = OptionParser.new do |opts|
20
19
  end
21
20
  options_parser.parse!
22
21
 
23
- if ARGV.length != 2
22
+ if ARGV.length < 2
24
23
  STDERR.puts options_parser
25
24
  exit 1
26
25
  end
27
26
 
28
- ColRead::Text.new(url: ARGV.first, output: ARGV.last).start
27
+ ColRead::Text.new(url: ARGV.first, output: ARGV.last, encode: options[:encode]).start
@@ -15,8 +15,8 @@ end
15
15
  module ColRead
16
16
  module Core
17
17
  def chapters url
18
- doc = Nokogiri::HTML(open(url))
19
- doc.css('a').group_by{|a| a.indent}.sort_by{|a| a.last.count}.last.last
18
+ doc = Nokogiri::HTML(_open(url))
19
+ doc.css('a').select{|a| a.text =~ /\S/ }.group_by{|a| a.indent}.sort_by{|a| a.last.count}.last.last
20
20
  end
21
21
 
22
22
  def ahref a
@@ -31,18 +31,25 @@ module ColRead
31
31
 
32
32
  def contents chapters
33
33
  chapters.each do |a|
34
- source = open(ahref(a)).read.encode('utf-8')
34
+ source = _open(ahref(a))
35
35
  content=Nokogiri::HTML(Readability::Document.new(source).content).text
36
36
  yield [a.text, content]
37
37
  end
38
38
  end
39
+
40
+ def _open url
41
+ source = open(url).read
42
+ source.force_encoding(@encode)
43
+ source.encode!("utf-8", :undef => :replace, :replace => "?", :invalid => :replace)
44
+ end
39
45
  end
40
46
 
41
47
  class Text
42
48
  include ColRead::Core
43
- def initialize(options={url: '',output: ''})
49
+ def initialize(options={url: '',output: '', encode: ''})
44
50
  @url=options[:url]
45
51
  @output=options[:output]
52
+ @encode=options[:encode]
46
53
  @root=@url.split(/(?<!\/)\/(?!\/)/).first
47
54
  end
48
55
  def start
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: colread
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yunjie chen