epub_book 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/epub_book/book.rb +20 -9
- data/lib/epub_book/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3fb57f569d6a4f72770cd99441352ec380dcdb20
|
|
4
|
+
data.tar.gz: 8bc1961447a9490bc36ee3e055f24da6d9a8a24a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0794e58ce0add4601e4bb639177a215e88c99622988f538739fe46df3fd1e0d6c989034d27350ecc5403ffbe09f5a364592dcf03504f42327261f83fe830c948
|
|
7
|
+
data.tar.gz: d5d3c1fcbfc6831c3d007a47b9b5c9a68c6c00cfca3c9996002ed451a7775c8733f343752553a0c4dc8a18f199dbaef110ce453b0210352ee56ca57a9965274c
|
data/lib/epub_book/book.rb
CHANGED
|
@@ -32,7 +32,7 @@ module EpubBook
|
|
|
32
32
|
@des_url = des_url
|
|
33
33
|
@user_agent = UserAgent
|
|
34
34
|
@referer = Referer
|
|
35
|
-
@folder_name = Base64.urlsafe_encode64(Digest::MD5.digest(@index_url))[0..-
|
|
35
|
+
@folder_name = Base64.urlsafe_encode64(Digest::MD5.digest(@index_url))[0..-3]
|
|
36
36
|
@creator = 'javy_liu'
|
|
37
37
|
@title_css = '.wrapper h1.title1'
|
|
38
38
|
@index_item_css = 'ul.list3>li>a'
|
|
@@ -110,16 +110,14 @@ module EpubBook
|
|
|
110
110
|
#得到书目索引
|
|
111
111
|
def fetch_index(url=nil)
|
|
112
112
|
url ||= @index_url
|
|
113
|
-
|
|
114
|
-
doc = Nokogiri::HTML(Encoding::UTF_8 === _tmp_string.encoding ? _tmp_string : _tmp_string.force_encoding('gbk').encode('utf-8'))
|
|
113
|
+
doc = Nokogiri::HTML(judge_encoding(open(URI.encode(url),"User-Agent" => @user_agent ,'Referer'=> @referer).read))
|
|
115
114
|
#generate index.yml
|
|
116
115
|
|
|
117
116
|
if !book[:title]
|
|
118
117
|
doc1 = if @des_url.nil?
|
|
119
118
|
doc
|
|
120
119
|
else
|
|
121
|
-
|
|
122
|
-
Nokogiri::HTML(Encoding::UTF_8 === _tmp_string.encoding ? _tmp_string : _tmp_string.force_encoding('gbk').encode('utf-8'))
|
|
120
|
+
Nokogiri::HTML(judge_encoding(open(URI.encode(@des_url),"User-Agent" => @user_agent ,'Referer'=> @referer).read))
|
|
123
121
|
end
|
|
124
122
|
get_des(doc1)
|
|
125
123
|
end
|
|
@@ -127,8 +125,17 @@ module EpubBook
|
|
|
127
125
|
|
|
128
126
|
doc.css(@index_item_css).each do |item|
|
|
129
127
|
_href = URI.encode(item.attr(@item_attr).to_s)
|
|
130
|
-
next if _href.start_with?('javascript')
|
|
131
|
-
|
|
128
|
+
next if _href.start_with?('javascript') || _href.start_with?('#')
|
|
129
|
+
|
|
130
|
+
if _href.start_with?("http")
|
|
131
|
+
_href
|
|
132
|
+
elsif _href.start_with?("/")
|
|
133
|
+
_href = "#{link_host}#{_href}"
|
|
134
|
+
else
|
|
135
|
+
@path_name ||= File.dirname(@index_url)
|
|
136
|
+
_href = "#{@path_name}/#{_href}"
|
|
137
|
+
end
|
|
138
|
+
|
|
132
139
|
book[:files] << {label: item.text, url: _href}
|
|
133
140
|
end
|
|
134
141
|
|
|
@@ -159,8 +166,7 @@ module EpubBook
|
|
|
159
166
|
next if test(?s,content_path)
|
|
160
167
|
|
|
161
168
|
begin
|
|
162
|
-
|
|
163
|
-
doc_file = Nokogiri::HTML(Encoding::UTF_8 === _tmp_string.encoding ? _tmp_string : _tmp_string.force_encoding('gbk').encode('utf-8'))
|
|
169
|
+
doc_file = Nokogiri::HTML(judge_encoding(open(item[:url],"User-Agent" => @user_agent,'Referer'=> @referer).read))
|
|
164
170
|
|
|
165
171
|
File.open(content_path,'w') do |f|
|
|
166
172
|
f.write("<h3>#{item[:label]}</h3>")
|
|
@@ -180,6 +186,11 @@ module EpubBook
|
|
|
180
186
|
|
|
181
187
|
|
|
182
188
|
private
|
|
189
|
+
#is valid encoding
|
|
190
|
+
def judge_encoding(str)
|
|
191
|
+
/<meta.*?charset\s*=[\s\"\']?utf-8/i =~ str ? str : str.force_encoding('gbk').encode('utf-8')
|
|
192
|
+
end
|
|
193
|
+
|
|
183
194
|
#得到书名,介绍,及封面
|
|
184
195
|
def get_des(doc)
|
|
185
196
|
book[:title] = doc.css(@title_css).text.strip
|
data/lib/epub_book/version.rb
CHANGED