colread 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/colread +28 -0
- data/lib/colread.rb +49 -0
- metadata +45 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 20aa61c77b99f49fd87290fe5061307ebd517d83
|
4
|
+
data.tar.gz: 103f1857d93809b17a6f2d850033555c3061bc41
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ea3687991d2e945d896e8069ad7a85df8e7e0f68bd50662139f7ea073bcfc7c28387b88c3121e7a98f54a9b857d35b2653af6690d1947aa7b305ae37fd8721b4
|
7
|
+
data.tar.gz: b070e16493c92b5b90f24c4013561fe85aaa442c7974e883ae3bd29ed13facf835f00a5b5cb9d83531803c627466faddcb5bc6b495c416b7e84d02611bed0095
|
data/bin/colread
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'colread'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'optparse'
|
6
|
+
#require File.dirname(__FILE__) + '/../lib/readability'
|
7
|
+
|
8
|
+
options = { :debug => false, :images => false }
|
9
|
+
options_parser = OptionParser.new do |opts|
|
10
|
+
opts.banner = "Usage: #{File.basename($0)} [options] URL FileName"
|
11
|
+
|
12
|
+
opts.on("-d", "--debug", "Show debug output") do |v|
|
13
|
+
options[:debug] = v
|
14
|
+
end
|
15
|
+
|
16
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
17
|
+
puts opts
|
18
|
+
exit
|
19
|
+
end
|
20
|
+
end
|
21
|
+
options_parser.parse!
|
22
|
+
|
23
|
+
if ARGV.length != 2
|
24
|
+
STDERR.puts options_parser
|
25
|
+
exit 1
|
26
|
+
end
|
27
|
+
|
28
|
+
ColRead::Text.new(url: ARGV.first, output: ARGV.last).start
|
data/lib/colread.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'readability'
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
module XML
|
7
|
+
class Element
|
8
|
+
def indent
|
9
|
+
self['href'].gsub /\d+/, 'x' rescue nil
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module ColRead
|
16
|
+
module Core
|
17
|
+
def chapters url
|
18
|
+
doc = Nokogiri::HTML(open(url))
|
19
|
+
doc.css('a').group_by{|a| a.indent}.sort_by{|a| a.last.count}.last.last
|
20
|
+
end
|
21
|
+
|
22
|
+
def contents chapters
|
23
|
+
chapters.each do |a|
|
24
|
+
href=a['href'].start_with?('http') ? a['href'] : @root+a['href']
|
25
|
+
source = open(href).read.encode('utf-8')
|
26
|
+
content=Nokogiri::HTML(Readability::Document.new(source).content).text
|
27
|
+
yield [a.text, content]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class Text
|
33
|
+
include ColRead::Core
|
34
|
+
def initialize(options={url: '',output: ''})
|
35
|
+
@url=options[:url]
|
36
|
+
@output=options[:output]
|
37
|
+
@root=@url.split(/(?<!\/)\/(?!\/)/).first
|
38
|
+
end
|
39
|
+
def start
|
40
|
+
f=open(@output,'w')
|
41
|
+
contents(chapters(@url)) do |c|
|
42
|
+
puts c.first
|
43
|
+
f.puts c.first
|
44
|
+
f.puts c.last
|
45
|
+
end
|
46
|
+
f.close
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: colread
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- yunjie chen
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-06-03 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: download a list of text on a page
|
14
|
+
email: kikyous@163.com
|
15
|
+
executables:
|
16
|
+
- colread
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/colread.rb
|
21
|
+
- bin/colread
|
22
|
+
homepage: http://rubygems.org/gems/colread
|
23
|
+
licenses: []
|
24
|
+
metadata: {}
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 2.0.3
|
42
|
+
signing_key:
|
43
|
+
specification_version: 4
|
44
|
+
summary: colread!
|
45
|
+
test_files: []
|