colread 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/colread +28 -0
- data/lib/colread.rb +49 -0
- metadata +45 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 20aa61c77b99f49fd87290fe5061307ebd517d83
|
4
|
+
data.tar.gz: 103f1857d93809b17a6f2d850033555c3061bc41
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ea3687991d2e945d896e8069ad7a85df8e7e0f68bd50662139f7ea073bcfc7c28387b88c3121e7a98f54a9b857d35b2653af6690d1947aa7b305ae37fd8721b4
|
7
|
+
data.tar.gz: b070e16493c92b5b90f24c4013561fe85aaa442c7974e883ae3bd29ed13facf835f00a5b5cb9d83531803c627466faddcb5bc6b495c416b7e84d02611bed0095
|
data/bin/colread
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'colread'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'optparse'
|
6
|
+
#require File.dirname(__FILE__) + '/../lib/readability'
|
7
|
+
|
8
|
+
options = { :debug => false, :images => false }
|
9
|
+
options_parser = OptionParser.new do |opts|
|
10
|
+
opts.banner = "Usage: #{File.basename($0)} [options] URL FileName"
|
11
|
+
|
12
|
+
opts.on("-d", "--debug", "Show debug output") do |v|
|
13
|
+
options[:debug] = v
|
14
|
+
end
|
15
|
+
|
16
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
17
|
+
puts opts
|
18
|
+
exit
|
19
|
+
end
|
20
|
+
end
|
21
|
+
options_parser.parse!
|
22
|
+
|
23
|
+
if ARGV.length != 2
|
24
|
+
STDERR.puts options_parser
|
25
|
+
exit 1
|
26
|
+
end
|
27
|
+
|
28
|
+
ColRead::Text.new(url: ARGV.first, output: ARGV.last).start
|
data/lib/colread.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'readability'
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
module XML
|
7
|
+
class Element
|
8
|
+
def indent
|
9
|
+
self['href'].gsub /\d+/, 'x' rescue nil
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module ColRead
|
16
|
+
module Core
|
17
|
+
def chapters url
|
18
|
+
doc = Nokogiri::HTML(open(url))
|
19
|
+
doc.css('a').group_by{|a| a.indent}.sort_by{|a| a.last.count}.last.last
|
20
|
+
end
|
21
|
+
|
22
|
+
def contents chapters
|
23
|
+
chapters.each do |a|
|
24
|
+
href=a['href'].start_with?('http') ? a['href'] : @root+a['href']
|
25
|
+
source = open(href).read.encode('utf-8')
|
26
|
+
content=Nokogiri::HTML(Readability::Document.new(source).content).text
|
27
|
+
yield [a.text, content]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class Text
|
33
|
+
include ColRead::Core
|
34
|
+
def initialize(options={url: '',output: ''})
|
35
|
+
@url=options[:url]
|
36
|
+
@output=options[:output]
|
37
|
+
@root=@url.split(/(?<!\/)\/(?!\/)/).first
|
38
|
+
end
|
39
|
+
def start
|
40
|
+
f=open(@output,'w')
|
41
|
+
contents(chapters(@url)) do |c|
|
42
|
+
puts c.first
|
43
|
+
f.puts c.first
|
44
|
+
f.puts c.last
|
45
|
+
end
|
46
|
+
f.close
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: colread
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- yunjie chen
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-06-03 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: download a list of text on a page
|
14
|
+
email: kikyous@163.com
|
15
|
+
executables:
|
16
|
+
- colread
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/colread.rb
|
21
|
+
- bin/colread
|
22
|
+
homepage: http://rubygems.org/gems/colread
|
23
|
+
licenses: []
|
24
|
+
metadata: {}
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 2.0.3
|
42
|
+
signing_key:
|
43
|
+
specification_version: 4
|
44
|
+
summary: colread!
|
45
|
+
test_files: []
|