ficrip 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 274aeb5446a0a3d556d227c02e66fd394c0f7c73
4
- data.tar.gz: 95c5d132493981015636a876efd8fc139a800e3a
3
+ metadata.gz: 1a777da71d255f1d0dd702848a84d4442f5e8f54
4
+ data.tar.gz: e7b03da00e5e7aa55fda7c3b2771bb60c4061eb7
5
5
  SHA512:
6
- metadata.gz: e9be42f1da6348b7d24c348aae7e82864c3b8f1300041201c516ca1d007f2db82441a45c15560ed61b39329b728764e44181b290127dc4516b429761c085de09
7
- data.tar.gz: 84b991e21709287b39ef532e611910615922c1709992914e6dd149c4dccfbdc224ba145a68cdfef16b384ec78d3f499c69dad41fb1590840caaacd7c06aedcfa
6
+ metadata.gz: d22c5f1d7d942af2c2f0eaacce64877b32b816e209215ac4818e26e730d809422419a60322fce4b8e7e5ef60f8b93e2cc5639d5fa496028b71421da72a772206
7
+ data.tar.gz: c0e711f7c43c3e826f16b274384efb5f09d41d27358447f9e5598899b769e4911a20d12950a2f83fcfe9f3c20faa1b64a2311b500366836871c091521ba28d51
data/bin/ficrip CHANGED
@@ -53,7 +53,8 @@ storyids.each_with_index do |storyid, idx|
53
53
  progressbar.total = fic.chapters.count
54
54
  progressbar.format = prefix + "#{author_title} | %a [%B] %p%% (%c/%C)"
55
55
 
56
- epub = fic.bind(callback: lambda { progressbar.increment })
56
+
57
+ epub = fic.bind(version: opts[:epub2] ? 2 : 3, callback: lambda { progressbar.increment })
57
58
 
58
59
  progressbar.format = prefix + "#{author_title}... Binding"
59
60
  filename = File.join(Dir.pwd, "#{author_title}.epub")
data/ficrip.gemspec CHANGED
@@ -24,8 +24,10 @@ Gem::Specification.new do |spec|
24
24
  spec.add_dependency 'contracts', '~> 0.14'
25
25
  spec.add_dependency 'i18n_data', '~> 0.7'
26
26
  spec.add_dependency 'fastimage', '~> 1.8'
27
+ spec.add_dependency 'chronic_duration', '~> 0.10'
28
+ spec.add_dependency 'retryable', '~> 2.0'
27
29
 
28
- spec.add_development_dependency 'bundler', '~> 1.12'
29
- spec.add_development_dependency 'rake', '~> 11.2'
30
+ spec.add_development_dependency 'bundler'
31
+ spec.add_development_dependency 'rake'
30
32
  spec.add_development_dependency 'rspec', '~> 3.5'
31
33
  end
@@ -1,6 +1,7 @@
1
1
  class Array
2
2
  def find_with(str)
3
- find { |i| i.start_with? str }.gsub(str, '').strip
3
+ r = find { |i| i.start_with? str }
4
+ r.gsub(str, '').strip if r
4
5
  end
5
6
  end
6
7
 
@@ -14,3 +15,30 @@ class String
14
15
  gsub(/^[ \t]{#{indent}}/, '')
15
16
  end
16
17
  end
18
+
19
+ class Object
20
+ def as
21
+ yield self
22
+ end
23
+
24
+ # From http://stackoverflow.com/a/8206537
25
+ def deep_clone
26
+ return @deep_cloning_obj if @deep_cloning
27
+ @deep_cloning_obj = clone
28
+ @deep_cloning_obj.instance_variables.each do |var|
29
+ val = @deep_cloning_obj.instance_variable_get(var)
30
+ begin
31
+ @deep_cloning = true
32
+ val = val.deep_clone
33
+ rescue TypeError
34
+ next
35
+ ensure
36
+ @deep_cloning = false
37
+ end
38
+ @deep_cloning_obj.instance_variable_set(var, val)
39
+ end
40
+ deep_cloning_obj = @deep_cloning_obj
41
+ @deep_cloning_obj = nil
42
+ deep_cloning_obj
43
+ end
44
+ end
@@ -1,8 +1,12 @@
1
1
  require 'contracts'
2
- require 'gepub'
2
+ require 'open-uri'
3
+
4
+ # Helpers
3
5
  require 'i18n_data'
4
6
  require 'fastimage'
5
- require 'open-uri'
7
+ require 'chronic_duration'
8
+ require 'retryable'
9
+
6
10
  require_relative 'extensions'
7
11
 
8
12
  module Ficrip
@@ -12,7 +16,12 @@ module Ficrip
12
16
  Contract Integer => Story
13
17
  def self.fetch(storyid)
14
18
  base_url = "https://www.fanfiction.net/s/#{storyid}/"
15
- primary_page = Nokogiri::HTML open(base_url)
19
+
20
+ primary_page = Retryable.retryable(tries: :infinite, on: OpenURI::HTTPError) do
21
+ Nokogiri::HTML open(base_url)
22
+ end
23
+
24
+ raise(ArgumentError.new("Invalid StoryID #{storyid}")) if primary_page.css('#profile_top').count == 0
16
25
 
17
26
  title = primary_page.css('#profile_top > b').first.text
18
27
  author = primary_page.css('#profile_top > a').first.text
@@ -23,18 +32,33 @@ module Ficrip
23
32
 
24
33
  info = primary_page.css('#profile_top > span.xgray.xcontrast_txt').text.split(' - ')
25
34
 
26
- s.rating = info.find_with 'Rated: Fiction'
27
- s.language = info[1]
28
- s.genres = info[2].split('/')
29
- s.characters = info[3].strip
30
- s.chapter_count = info.find_with('Chapters:').parse_int
31
- s.word_count = info.find_with('Words:').parse_int
32
- s.review_count = info.find_with('Reviews:').parse_int
33
- s.favs_count = info.find_with('Favs:').parse_int
34
- s.follows_count = info.find_with('Follows:').parse_int
35
- s.updated_date = Date.parse info.find_with('Updated:')
36
- s.published_date = Date.parse info.find_with('Published:')
37
- s.info_id = info.find_with('id:').to_i
35
+ s.rating = info.find_with 'Rated: Fiction'
36
+ s.language = info[1]
37
+ s.genres = info[2].split('/')
38
+ s.characters = info[3].strip
39
+ s.chapter_count = info.find_with('Chapters:').as { |c| c.parse_int unless c.nil? }
40
+ s.word_count = info.find_with('Words:').parse_int
41
+ s.review_count = info.find_with('Reviews:').parse_int
42
+ s.favs_count = info.find_with('Favs:').parse_int
43
+ s.follows_count = info.find_with('Follows:').parse_int
44
+
45
+ s.updated_date = info.find_with('Updated:').as do |d|
46
+ begin
47
+ Date.strptime(d, '%m/%d/%Y')
48
+ rescue
49
+ Date.strptime(d, '%m/%d') rescue (Time.now - ChronicDuration.parse(d)).to_date
50
+ end if d
51
+ end
52
+
53
+ s.published_date = info.find_with('Published:').as do |d|
54
+ begin
55
+ Date.strptime(d, '%m/%d/%Y')
56
+ rescue
57
+ Date.strptime(d, '%m/%d') rescue (Time.now - ChronicDuration.parse(d)).to_date
58
+ end
59
+ end
60
+
61
+ s.info_id = info.find_with('id:').to_i
38
62
 
39
63
  raise(Exception.new("Error! StoryID and parsed ID don't match.")) if s.info_id != storyid
40
64
 
@@ -46,14 +70,14 @@ module Ficrip
46
70
  if chapter_select
47
71
  s.chapters = chapter_select.children.map(&:text)
48
72
  else
49
- s.chapters = "1. #{title}"
73
+ s.chapters = ["1. #{title}"]
50
74
  end
51
75
  end
52
76
  end
53
77
 
54
78
  Contract Integer => GEPUB::Book
55
79
  def self.get(storyid, version: 3)
56
- fetch(storyid).bind(version)
80
+ fetch(storyid).bind(version: version)
57
81
  end
58
82
 
59
83
  end
data/lib/ficrip/story.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  require 'contracts'
2
2
  require 'open-uri'
3
+ require 'retryable'
4
+ require 'gepub'
5
+
3
6
  require_relative 'extensions'
4
7
 
5
8
  module Ficrip
@@ -7,7 +10,7 @@ module Ficrip
7
10
  include Contracts::Core
8
11
  include Contracts::Builtin
9
12
 
10
- attr_accessor :title, :author, :metadata
13
+ attr_accessor :title, :author, :url, :metadata
11
14
 
12
15
  DOCTYPE = {
13
16
  3 => '<!DOCTYPE html>',
@@ -21,11 +24,6 @@ module Ficrip
21
24
  @metadata = metadata
22
25
  end
23
26
 
24
- def construct
25
- yield self
26
- self
27
- end
28
-
29
27
  def self.construct(title, author, url)
30
28
  s = Story::new(title, author, url)
31
29
  yield s
@@ -43,7 +41,14 @@ module Ficrip
43
41
  end
44
42
  end
45
43
 
44
+ def respond_to_missing?(method_sym, include_private = false)
45
+ @metadata.key?(method_sym) ||
46
+ (method_sym != :title= && method_sym != :author= && method_sym.to_s.end_with?('=')) ||
47
+ super
48
+ end
49
+
46
50
  Contract Symbol, String => Story
51
+
47
52
  def add_metadata(key, value)
48
53
  @metadata[key] = value
49
54
  self
@@ -51,8 +56,6 @@ module Ficrip
51
56
 
52
57
  # Contract { version: Maybe[Or[2, 3]] }
53
58
  def bind(version: 3, callback: nil)
54
-
55
-
56
59
  book = GEPUB::Book.new('OEPBS/package.opf', 'version' => version.to_f.to_s)
57
60
  book.primary_identifier(@url, 'BookId', 'URL')
58
61
 
@@ -62,7 +65,7 @@ module Ficrip
62
65
 
63
66
  # Cover if it exists
64
67
  if @metadata.key? :cover_url
65
- cover = open(@metadata[:cover_url], 'Referer' => @url)
68
+ cover = open!(@metadata[:cover_url], 'Referer' => @url)
66
69
  cover_type = FastImage.type(cover)
67
70
  book.add_item(format('img/cover_image.%s', cover_type), cover)
68
71
  .cover_image
@@ -91,18 +94,49 @@ module Ficrip
91
94
  XHTML
92
95
  end
93
96
 
97
+ titlepage = <<-XHTML.strip_heredoc
98
+ <?xml version="1.0" encoding="utf-8"?>
99
+ #{DOCTYPE[version]}
100
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
101
+ <head>
102
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
103
+ <title>#{@title}</title>
104
+ <style type="text/css" title="override_css">
105
+ .outer { display: table; height: 75%; width: 100%; }
106
+ .middle { display: table-cell; vertical-align: middle; }
107
+ .inner { text-align: center; }
108
+ </style>
109
+ </head>
110
+ <body>
111
+ <div class="outer"><div class="middle"><div class="inner">
112
+ <h1>#{@title}</h1>
113
+ <h3>#{@author}</h3>
114
+ </div></div></div>
115
+ </body>
116
+ </html>
117
+ XHTML
118
+
119
+
120
+ table_of_contents = nil
94
121
  book.ordered do
95
122
  book.add_item('img/coverpage.xhtml')
96
123
  .add_content(StringIO.new(coverpage))
97
124
  .toc_text(@title) if @metadata.key? :cover_url
98
125
 
99
- unless @metadata[:chapters]
100
- @metadata[:chapters] = @url
101
- end
126
+ book.add_item('text/titlepage.xhtml')
127
+ .add_content(StringIO.new(Nokogiri::XML(titlepage) { |c| c.noblanks }.to_xhtml(indent: 2)))
128
+
129
+ book.add_item('text/infopage.xhtml')
130
+ .add_content(StringIO.new(Nokogiri::XML(render_metadata) { |c| c.noblanks }.to_xhtml(indent: 2)))
131
+ .toc_text('About')
132
+
133
+ # We want our TOC to be after the cover and titlepage, but we don't any content
134
+ # for it yet, so we save it for later.
135
+ table_of_contents = book.add_item('text/toc.xhtml').toc_text('Table of Contents')
102
136
 
103
137
  chapters.each do |chapter|
104
138
  chapter_num, chapter_title = chapter.match(/^(\d+)\s*[-\\.)]?\s+(.*)/).captures
105
- chapter_page = Nokogiri::HTML open(URI.join(@url, chapter_num))
139
+ chapter_page = Nokogiri::HTML open!(URI.join(@url, chapter_num))
106
140
 
107
141
  storytext = chapter_page.css('#storytext').first
108
142
  storytext.xpath('//@noshade').remove
@@ -120,20 +154,104 @@ module Ficrip
120
154
  #{'<section epub:type="chapter">' if version == 3}
121
155
  <h1 style="text-align:center">#{chapter_title}</h1>
122
156
  #{storytext.children.to_xhtml}
123
- #{'</section>' if version == 3}
157
+ #{'</section>' if version == 3}
124
158
  </body>
125
159
  </html>
126
160
  XHTML
127
161
 
128
- book.add_item(format('text/chapter%03d.xhtml', chapter_num),
129
- nil, "c#{chapter_num}")
130
- .add_content(StringIO.new(Nokogiri::XML(chapter_xhtml){|c| c.noblanks}.to_xhtml(indent:2)))
162
+ book.add_item(format('text/chapter%03d.xhtml', chapter_num), nil, "c#{chapter_num}")
163
+ .add_content(StringIO.new(Nokogiri::XML(chapter_xhtml) { |c| c.noblanks }.to_xhtml(indent: 2)))
131
164
  .toc_text(chapter_title)
132
165
 
133
- callback.call if callback
166
+ if callback
167
+ args = [chapter_num.to_i, chapters.count]
168
+ n = callback.arity
169
+ callback.call *(n < 0 ? args : args.take(n))
170
+ end
134
171
  end
135
172
  end
173
+
174
+ # This generates a proper Table of Contents page at the start of the book by
175
+ # removing references to the cover and TOC itself
176
+ book_copy = book.deep_clone
177
+ cut_idx = @metadata.key?(:cover_url) ? 3 : 2
178
+ book_copy.instance_variable_set(:@toc, book_copy.instance_variable_get(:@toc)[cut_idx..-1])
179
+ table_of_contents.add_content(StringIO.new(book_copy.nav_doc)) # Finally, we get to add the actual content
180
+
181
+ # Now that we've generated the TOC page, go through every chapter reference in the
182
+ # toc and prepend the chapter number. This is for the epub's built-in table of contents
183
+ book.instance_variable_get(:@toc)[cut_idx..-1].each_with_index do |chap, idx|
184
+ chap[:text] = "#{idx + 1}. #{chap[:text]}"
185
+ end
186
+
187
+ add_item('nav.html', StringIO.new(book_copy.nav_doc), 'nav').add_property('nav') if version == 3
188
+
136
189
  book
137
190
  end
191
+
192
+ def render_metadata
193
+ data = {
194
+ 'Rating' => rating,
195
+ 'Language' => language,
196
+ 'Genres' => genres.join(', '),
197
+ 'Characters/Pairings' => characters,
198
+ 'Chapter count' => format_num(chapter_count),
199
+ 'Word count' => format_num(word_count),
200
+ 'Reviews' => "<a href='https://fanfiction.com/r/#{info_id}'>" + format_num(review_count) + '</a>',
201
+ 'Favorites' => format_num(favs_count),
202
+ 'Follows' => format_num(follows_count),
203
+ 'Updated' => updated_date,
204
+ 'Published' => published_date,
205
+ 'ID' => info_id
206
+ }
207
+
208
+ Nokogiri::XML::Builder.new(encoding: 'utf-8') { |doc|
209
+ doc.html('xmlns' => 'http://www.w3.org/1999/xhtml', 'xml:lang' => 'en') {
210
+ doc.head {
211
+ doc.meta 'http-equiv' => 'Content-Type', 'content' => 'text/html; charset=UTF-8'
212
+ doc.title 'About'
213
+ }
214
+ doc.body {
215
+ doc.p { doc.strong 'Author: '; doc.a(href: author_url) { doc.text @author } }
216
+ doc.p { doc.strong 'Summary:'; doc.br; doc.text summary }
217
+ data.each do |k, v|
218
+ doc.span {
219
+ doc.strong(k + ':')
220
+ (v.to_s.start_with? '<a') ? doc << v.to_s : doc.text(' ' + v.to_s)
221
+ doc.br
222
+ }
223
+ end
224
+ }
225
+ }
226
+ }.to_xml
227
+ end
228
+
229
+ private
230
+ def open!(*args, &block)
231
+ Retryable.retryable(tries: :infinite, on: OpenURI::HTTPError) do
232
+ open(*args, &block)
233
+ end
234
+ end
235
+
236
+ def handle_url(string)
237
+ if string.start_with? 'https://www.fanfiction.net/u/'
238
+ "<a href='#{string}'>"
239
+ end
240
+ end
241
+
242
+ # # File activesupport/lib/active_support/inflector/methods.rb, line 123
243
+ # def humanize(word, capitalize: false)
244
+ # result = word.to_s.dup
245
+ # result.sub!(/\A_+/, ''.freeze)
246
+ # result.sub!(/_id\z/, ''.freeze)
247
+ # result.tr!('_'.freeze, ' '.freeze)
248
+ # result.gsub!(/([a-z\d]*)/i) { |match| match.downcase }
249
+ # result.sub!(/\A\w/) { |match| match.upcase } if capitalize
250
+ # result
251
+ # end
252
+
253
+ def format_num(num)
254
+ num.to_s.reverse.gsub(/...(?=.)/, '\&,').reverse
255
+ end
138
256
  end
139
- end
257
+ end
@@ -1,3 +1,3 @@
1
1
  module Ficrip
2
- VERSION = '0.0.1'
2
+ VERSION = '0.0.2'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ficrip
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Katherine Whitlock
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-21 00:00:00.000000000 Z
11
+ date: 2016-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -109,33 +109,61 @@ dependencies:
109
109
  - !ruby/object:Gem::Version
110
110
  version: '1.8'
111
111
  - !ruby/object:Gem::Dependency
112
- name: bundler
112
+ name: chronic_duration
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: '1.12'
118
- type: :development
117
+ version: '0.10'
118
+ type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: '1.12'
124
+ version: '0.10'
125
125
  - !ruby/object:Gem::Dependency
126
- name: rake
126
+ name: retryable
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: '11.2'
132
- type: :development
131
+ version: '2.0'
132
+ type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: '11.2'
138
+ version: '2.0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: bundler
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: rake
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
139
167
  - !ruby/object:Gem::Dependency
140
168
  name: rspec
141
169
  requirement: !ruby/object:Gem::Requirement