colread 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/colread +28 -0
  3. data/lib/colread.rb +49 -0
  4. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 20aa61c77b99f49fd87290fe5061307ebd517d83
4
+ data.tar.gz: 103f1857d93809b17a6f2d850033555c3061bc41
5
+ SHA512:
6
+ metadata.gz: ea3687991d2e945d896e8069ad7a85df8e7e0f68bd50662139f7ea073bcfc7c28387b88c3121e7a98f54a9b857d35b2653af6690d1947aa7b305ae37fd8721b4
7
+ data.tar.gz: b070e16493c92b5b90f24c4013561fe85aaa442c7974e883ae3bd29ed13facf835f00a5b5cb9d83531803c627466faddcb5bc6b495c416b7e84d02611bed0095
data/bin/colread ADDED
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'colread'
4
+ require 'rubygems'
5
+ require 'optparse'
6
+ #require File.dirname(__FILE__) + '/../lib/readability'
7
+
8
+ options = { :debug => false, :images => false }
9
+ options_parser = OptionParser.new do |opts|
10
+ opts.banner = "Usage: #{File.basename($0)} [options] URL FileName"
11
+
12
+ opts.on("-d", "--debug", "Show debug output") do |v|
13
+ options[:debug] = v
14
+ end
15
+
16
+ opts.on_tail("-h", "--help", "Show this message") do
17
+ puts opts
18
+ exit
19
+ end
20
+ end
21
+ options_parser.parse!
22
+
23
+ if ARGV.length != 2
24
+ STDERR.puts options_parser
25
+ exit 1
26
+ end
27
+
28
+ ColRead::Text.new(url: ARGV.first, output: ARGV.last).start
data/lib/colread.rb ADDED
@@ -0,0 +1,49 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'readability'
4
+
5
+ module Nokogiri
6
+ module XML
7
+ class Element
8
+ def indent
9
+ self['href'].gsub /\d+/, 'x' rescue nil
10
+ end
11
+ end
12
+ end
13
+ end
14
+
15
+ module ColRead
16
+ module Core
17
+ def chapters url
18
+ doc = Nokogiri::HTML(open(url))
19
+ doc.css('a').group_by{|a| a.indent}.sort_by{|a| a.last.count}.last.last
20
+ end
21
+
22
+ def contents chapters
23
+ chapters.each do |a|
24
+ href=a['href'].start_with?('http') ? a['href'] : @root+a['href']
25
+ source = open(href).read.encode('utf-8')
26
+ content=Nokogiri::HTML(Readability::Document.new(source).content).text
27
+ yield [a.text, content]
28
+ end
29
+ end
30
+ end
31
+
32
+ class Text
33
+ include ColRead::Core
34
+ def initialize(options={url: '',output: ''})
35
+ @url=options[:url]
36
+ @output=options[:output]
37
+ @root=@url.split(/(?<!\/)\/(?!\/)/).first
38
+ end
39
+ def start
40
+ f=open(@output,'w')
41
+ contents(chapters(@url)) do |c|
42
+ puts c.first
43
+ f.puts c.first
44
+ f.puts c.last
45
+ end
46
+ f.close
47
+ end
48
+ end
49
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: colread
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - yunjie chen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-06-03 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: download a list of text on a page
14
+ email: kikyous@163.com
15
+ executables:
16
+ - colread
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/colread.rb
21
+ - bin/colread
22
+ homepage: http://rubygems.org/gems/colread
23
+ licenses: []
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.0.3
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: colread!
45
+ test_files: []