epub_book 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/epub_book/book.rb +20 -9
- data/lib/epub_book/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3fb57f569d6a4f72770cd99441352ec380dcdb20
|
4
|
+
data.tar.gz: 8bc1961447a9490bc36ee3e055f24da6d9a8a24a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0794e58ce0add4601e4bb639177a215e88c99622988f538739fe46df3fd1e0d6c989034d27350ecc5403ffbe09f5a364592dcf03504f42327261f83fe830c948
|
7
|
+
data.tar.gz: d5d3c1fcbfc6831c3d007a47b9b5c9a68c6c00cfca3c9996002ed451a7775c8733f343752553a0c4dc8a18f199dbaef110ce453b0210352ee56ca57a9965274c
|
data/lib/epub_book/book.rb
CHANGED
@@ -32,7 +32,7 @@ module EpubBook
|
|
32
32
|
@des_url = des_url
|
33
33
|
@user_agent = UserAgent
|
34
34
|
@referer = Referer
|
35
|
-
@folder_name = Base64.urlsafe_encode64(Digest::MD5.digest(@index_url))[0..-
|
35
|
+
@folder_name = Base64.urlsafe_encode64(Digest::MD5.digest(@index_url))[0..-3]
|
36
36
|
@creator = 'javy_liu'
|
37
37
|
@title_css = '.wrapper h1.title1'
|
38
38
|
@index_item_css = 'ul.list3>li>a'
|
@@ -110,16 +110,14 @@ module EpubBook
|
|
110
110
|
#得到书目索引
|
111
111
|
def fetch_index(url=nil)
|
112
112
|
url ||= @index_url
|
113
|
-
|
114
|
-
doc = Nokogiri::HTML(Encoding::UTF_8 === _tmp_string.encoding ? _tmp_string : _tmp_string.force_encoding('gbk').encode('utf-8'))
|
113
|
+
doc = Nokogiri::HTML(judge_encoding(open(URI.encode(url),"User-Agent" => @user_agent ,'Referer'=> @referer).read))
|
115
114
|
#generate index.yml
|
116
115
|
|
117
116
|
if !book[:title]
|
118
117
|
doc1 = if @des_url.nil?
|
119
118
|
doc
|
120
119
|
else
|
121
|
-
|
122
|
-
Nokogiri::HTML(Encoding::UTF_8 === _tmp_string.encoding ? _tmp_string : _tmp_string.force_encoding('gbk').encode('utf-8'))
|
120
|
+
Nokogiri::HTML(judge_encoding(open(URI.encode(@des_url),"User-Agent" => @user_agent ,'Referer'=> @referer).read))
|
123
121
|
end
|
124
122
|
get_des(doc1)
|
125
123
|
end
|
@@ -127,8 +125,17 @@ module EpubBook
|
|
127
125
|
|
128
126
|
doc.css(@index_item_css).each do |item|
|
129
127
|
_href = URI.encode(item.attr(@item_attr).to_s)
|
130
|
-
next if _href.start_with?('javascript')
|
131
|
-
|
128
|
+
next if _href.start_with?('javascript') || _href.start_with?('#')
|
129
|
+
|
130
|
+
if _href.start_with?("http")
|
131
|
+
_href
|
132
|
+
elsif _href.start_with?("/")
|
133
|
+
_href = "#{link_host}#{_href}"
|
134
|
+
else
|
135
|
+
@path_name ||= File.dirname(@index_url)
|
136
|
+
_href = "#{@path_name}/#{_href}"
|
137
|
+
end
|
138
|
+
|
132
139
|
book[:files] << {label: item.text, url: _href}
|
133
140
|
end
|
134
141
|
|
@@ -159,8 +166,7 @@ module EpubBook
|
|
159
166
|
next if test(?s,content_path)
|
160
167
|
|
161
168
|
begin
|
162
|
-
|
163
|
-
doc_file = Nokogiri::HTML(Encoding::UTF_8 === _tmp_string.encoding ? _tmp_string : _tmp_string.force_encoding('gbk').encode('utf-8'))
|
169
|
+
doc_file = Nokogiri::HTML(judge_encoding(open(item[:url],"User-Agent" => @user_agent,'Referer'=> @referer).read))
|
164
170
|
|
165
171
|
File.open(content_path,'w') do |f|
|
166
172
|
f.write("<h3>#{item[:label]}</h3>")
|
@@ -180,6 +186,11 @@ module EpubBook
|
|
180
186
|
|
181
187
|
|
182
188
|
private
|
189
|
+
#is valid encoding
|
190
|
+
def judge_encoding(str)
|
191
|
+
/<meta.*?charset\s*=[\s\"\']?utf-8/i =~ str ? str : str.force_encoding('gbk').encode('utf-8')
|
192
|
+
end
|
193
|
+
|
183
194
|
#得到书名,介绍,及封面
|
184
195
|
def get_des(doc)
|
185
196
|
book[:title] = doc.css(@title_css).text.strip
|
data/lib/epub_book/version.rb
CHANGED