epub_book 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0735c9b7e26babca4b2f7fa802ecdfe0b7d4a28a
4
- data.tar.gz: 10a55f1dd0faf6eb2c40496170f615eb4ab85604
3
+ metadata.gz: 3fb57f569d6a4f72770cd99441352ec380dcdb20
4
+ data.tar.gz: 8bc1961447a9490bc36ee3e055f24da6d9a8a24a
5
5
  SHA512:
6
- metadata.gz: 9c460cc949d519dfd0cf44e366ad724710cc0da94e84dcff8350d6b72c9a5bd3659a17f72891e4fd605cabfce44d7a75758169889076d7881758bb6bd68feabe
7
- data.tar.gz: ebf8039702739dd2d45ec763179d856c982ce67f89efa4cbe24ab68d3d9c2d82132d884c24ecfec5340e5bef5530a92e04fc69bf1bf3027352d26fd43b5a870f
6
+ metadata.gz: 0794e58ce0add4601e4bb639177a215e88c99622988f538739fe46df3fd1e0d6c989034d27350ecc5403ffbe09f5a364592dcf03504f42327261f83fe830c948
7
+ data.tar.gz: d5d3c1fcbfc6831c3d007a47b9b5c9a68c6c00cfca3c9996002ed451a7775c8733f343752553a0c4dc8a18f199dbaef110ce453b0210352ee56ca57a9965274c
@@ -32,7 +32,7 @@ module EpubBook
32
32
  @des_url = des_url
33
33
  @user_agent = UserAgent
34
34
  @referer = Referer
35
- @folder_name = Base64.urlsafe_encode64(Digest::MD5.digest(@index_url))[0..-2]
35
+ @folder_name = Base64.urlsafe_encode64(Digest::MD5.digest(@index_url))[0..-3]
36
36
  @creator = 'javy_liu'
37
37
  @title_css = '.wrapper h1.title1'
38
38
  @index_item_css = 'ul.list3>li>a'
@@ -110,16 +110,14 @@ module EpubBook
110
110
  #得到书目索引
111
111
  def fetch_index(url=nil)
112
112
  url ||= @index_url
113
- _tmp_string = open(URI.encode(url),"User-Agent" => @user_agent ,'Referer'=> @referer).read
114
- doc = Nokogiri::HTML(Encoding::UTF_8 === _tmp_string.encoding ? _tmp_string : _tmp_string.force_encoding('gbk').encode('utf-8'))
113
+ doc = Nokogiri::HTML(judge_encoding(open(URI.encode(url),"User-Agent" => @user_agent ,'Referer'=> @referer).read))
115
114
  #generate index.yml
116
115
 
117
116
  if !book[:title]
118
117
  doc1 = if @des_url.nil?
119
118
  doc
120
119
  else
121
- _tmp_string = open(URI.encode(@des_url),"User-Agent" => @user_agent ,'Referer'=> @referer).read
122
- Nokogiri::HTML(Encoding::UTF_8 === _tmp_string.encoding ? _tmp_string : _tmp_string.force_encoding('gbk').encode('utf-8'))
120
+ Nokogiri::HTML(judge_encoding(open(URI.encode(@des_url),"User-Agent" => @user_agent ,'Referer'=> @referer).read))
123
121
  end
124
122
  get_des(doc1)
125
123
  end
@@ -127,8 +125,17 @@ module EpubBook
127
125
 
128
126
  doc.css(@index_item_css).each do |item|
129
127
  _href = URI.encode(item.attr(@item_attr).to_s)
130
- next if _href.start_with?('javascript')
131
- _href = link_host + _href unless _href.start_with?("http")
128
+ next if _href.start_with?('javascript') || _href.start_with?('#')
129
+
130
+ if _href.start_with?("http")
131
+ _href
132
+ elsif _href.start_with?("/")
133
+ _href = "#{link_host}#{_href}"
134
+ else
135
+ @path_name ||= File.dirname(@index_url)
136
+ _href = "#{@path_name}/#{_href}"
137
+ end
138
+
132
139
  book[:files] << {label: item.text, url: _href}
133
140
  end
134
141
 
@@ -159,8 +166,7 @@ module EpubBook
159
166
  next if test(?s,content_path)
160
167
 
161
168
  begin
162
- _tmp_string = open(item[:url],"User-Agent" => @user_agent,'Referer'=> @referer).read
163
- doc_file = Nokogiri::HTML(Encoding::UTF_8 === _tmp_string.encoding ? _tmp_string : _tmp_string.force_encoding('gbk').encode('utf-8'))
169
+ doc_file = Nokogiri::HTML(judge_encoding(open(item[:url],"User-Agent" => @user_agent,'Referer'=> @referer).read))
164
170
 
165
171
  File.open(content_path,'w') do |f|
166
172
  f.write("<h3>#{item[:label]}</h3>")
@@ -180,6 +186,11 @@ module EpubBook
180
186
 
181
187
 
182
188
  private
189
+ #is valid encoding
190
+ def judge_encoding(str)
191
+ /<meta.*?charset\s*=[\s\"\']?utf-8/i =~ str ? str : str.force_encoding('gbk').encode('utf-8')
192
+ end
193
+
183
194
  #得到书名,介绍,及封面
184
195
  def get_des(doc)
185
196
  book[:title] = doc.css(@title_css).text.strip
@@ -1,3 +1,3 @@
1
1
  module EpubBook
2
- VERSION = "0.1.7"
2
+ VERSION = "0.1.8"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: epub_book
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - qmliu