colread 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/colread +28 -0
  3. data/lib/colread.rb +49 -0
  4. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 20aa61c77b99f49fd87290fe5061307ebd517d83
4
+ data.tar.gz: 103f1857d93809b17a6f2d850033555c3061bc41
5
+ SHA512:
6
+ metadata.gz: ea3687991d2e945d896e8069ad7a85df8e7e0f68bd50662139f7ea073bcfc7c28387b88c3121e7a98f54a9b857d35b2653af6690d1947aa7b305ae37fd8721b4
7
+ data.tar.gz: b070e16493c92b5b90f24c4013561fe85aaa442c7974e883ae3bd29ed13facf835f00a5b5cb9d83531803c627466faddcb5bc6b495c416b7e84d02611bed0095
data/bin/colread ADDED
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'colread'
4
+ require 'rubygems'
5
+ require 'optparse'
6
+ #require File.dirname(__FILE__) + '/../lib/readability'
7
+
8
+ options = { :debug => false, :images => false }
9
+ options_parser = OptionParser.new do |opts|
10
+ opts.banner = "Usage: #{File.basename($0)} [options] URL FileName"
11
+
12
+ opts.on("-d", "--debug", "Show debug output") do |v|
13
+ options[:debug] = v
14
+ end
15
+
16
+ opts.on_tail("-h", "--help", "Show this message") do
17
+ puts opts
18
+ exit
19
+ end
20
+ end
21
+ options_parser.parse!
22
+
23
+ if ARGV.length != 2
24
+ STDERR.puts options_parser
25
+ exit 1
26
+ end
27
+
28
+ ColRead::Text.new(url: ARGV.first, output: ARGV.last).start
data/lib/colread.rb ADDED
@@ -0,0 +1,49 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'readability'
4
+
5
+ module Nokogiri
6
+ module XML
7
+ class Element
8
+ def indent
9
+ self['href'].gsub /\d+/, 'x' rescue nil
10
+ end
11
+ end
12
+ end
13
+ end
14
+
15
+ module ColRead
16
+ module Core
17
+ def chapters url
18
+ doc = Nokogiri::HTML(open(url))
19
+ doc.css('a').group_by{|a| a.indent}.sort_by{|a| a.last.count}.last.last
20
+ end
21
+
22
+ def contents chapters
23
+ chapters.each do |a|
24
+ href=a['href'].start_with?('http') ? a['href'] : @root+a['href']
25
+ source = open(href).read.encode('utf-8')
26
+ content=Nokogiri::HTML(Readability::Document.new(source).content).text
27
+ yield [a.text, content]
28
+ end
29
+ end
30
+ end
31
+
32
+ class Text
33
+ include ColRead::Core
34
+ def initialize(options={url: '',output: ''})
35
+ @url=options[:url]
36
+ @output=options[:output]
37
+ @root=@url.split(/(?<!\/)\/(?!\/)/).first
38
+ end
39
+ def start
40
+ f=open(@output,'w')
41
+ contents(chapters(@url)) do |c|
42
+ puts c.first
43
+ f.puts c.first
44
+ f.puts c.last
45
+ end
46
+ f.close
47
+ end
48
+ end
49
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: colread
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - yunjie chen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-06-03 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: download a list of text on a page
14
+ email: kikyous@163.com
15
+ executables:
16
+ - colread
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/colread.rb
21
+ - bin/colread
22
+ homepage: http://rubygems.org/gems/colread
23
+ licenses: []
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.0.3
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: colread!
45
+ test_files: []