ficrip 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 274aeb5446a0a3d556d227c02e66fd394c0f7c73
4
- data.tar.gz: 95c5d132493981015636a876efd8fc139a800e3a
3
+ metadata.gz: 1a777da71d255f1d0dd702848a84d4442f5e8f54
4
+ data.tar.gz: e7b03da00e5e7aa55fda7c3b2771bb60c4061eb7
5
5
  SHA512:
6
- metadata.gz: e9be42f1da6348b7d24c348aae7e82864c3b8f1300041201c516ca1d007f2db82441a45c15560ed61b39329b728764e44181b290127dc4516b429761c085de09
7
- data.tar.gz: 84b991e21709287b39ef532e611910615922c1709992914e6dd149c4dccfbdc224ba145a68cdfef16b384ec78d3f499c69dad41fb1590840caaacd7c06aedcfa
6
+ metadata.gz: d22c5f1d7d942af2c2f0eaacce64877b32b816e209215ac4818e26e730d809422419a60322fce4b8e7e5ef60f8b93e2cc5639d5fa496028b71421da72a772206
7
+ data.tar.gz: c0e711f7c43c3e826f16b274384efb5f09d41d27358447f9e5598899b769e4911a20d12950a2f83fcfe9f3c20faa1b64a2311b500366836871c091521ba28d51
data/bin/ficrip CHANGED
@@ -53,7 +53,8 @@ storyids.each_with_index do |storyid, idx|
53
53
  progressbar.total = fic.chapters.count
54
54
  progressbar.format = prefix + "#{author_title} | %a [%B] %p%% (%c/%C)"
55
55
 
56
- epub = fic.bind(callback: lambda { progressbar.increment })
56
+
57
+ epub = fic.bind(version: opts[:epub2] ? 2 : 3, callback: lambda { progressbar.increment })
57
58
 
58
59
  progressbar.format = prefix + "#{author_title}... Binding"
59
60
  filename = File.join(Dir.pwd, "#{author_title}.epub")
data/ficrip.gemspec CHANGED
@@ -24,8 +24,10 @@ Gem::Specification.new do |spec|
24
24
  spec.add_dependency 'contracts', '~> 0.14'
25
25
  spec.add_dependency 'i18n_data', '~> 0.7'
26
26
  spec.add_dependency 'fastimage', '~> 1.8'
27
+ spec.add_dependency 'chronic_duration', '~> 0.10'
28
+ spec.add_dependency 'retryable', '~> 2.0'
27
29
 
28
- spec.add_development_dependency 'bundler', '~> 1.12'
29
- spec.add_development_dependency 'rake', '~> 11.2'
30
+ spec.add_development_dependency 'bundler'
31
+ spec.add_development_dependency 'rake'
30
32
  spec.add_development_dependency 'rspec', '~> 3.5'
31
33
  end
@@ -1,6 +1,7 @@
1
1
  class Array
2
2
  def find_with(str)
3
- find { |i| i.start_with? str }.gsub(str, '').strip
3
+ r = find { |i| i.start_with? str }
4
+ r.gsub(str, '').strip if r
4
5
  end
5
6
  end
6
7
 
@@ -14,3 +15,30 @@ class String
14
15
  gsub(/^[ \t]{#{indent}}/, '')
15
16
  end
16
17
  end
18
+
19
+ class Object
20
+ def as
21
+ yield self
22
+ end
23
+
24
+ # From http://stackoverflow.com/a/8206537
25
+ def deep_clone
26
+ return @deep_cloning_obj if @deep_cloning
27
+ @deep_cloning_obj = clone
28
+ @deep_cloning_obj.instance_variables.each do |var|
29
+ val = @deep_cloning_obj.instance_variable_get(var)
30
+ begin
31
+ @deep_cloning = true
32
+ val = val.deep_clone
33
+ rescue TypeError
34
+ next
35
+ ensure
36
+ @deep_cloning = false
37
+ end
38
+ @deep_cloning_obj.instance_variable_set(var, val)
39
+ end
40
+ deep_cloning_obj = @deep_cloning_obj
41
+ @deep_cloning_obj = nil
42
+ deep_cloning_obj
43
+ end
44
+ end
@@ -1,8 +1,12 @@
1
1
  require 'contracts'
2
- require 'gepub'
2
+ require 'open-uri'
3
+
4
+ # Helpers
3
5
  require 'i18n_data'
4
6
  require 'fastimage'
5
- require 'open-uri'
7
+ require 'chronic_duration'
8
+ require 'retryable'
9
+
6
10
  require_relative 'extensions'
7
11
 
8
12
  module Ficrip
@@ -12,7 +16,12 @@ module Ficrip
12
16
  Contract Integer => Story
13
17
  def self.fetch(storyid)
14
18
  base_url = "https://www.fanfiction.net/s/#{storyid}/"
15
- primary_page = Nokogiri::HTML open(base_url)
19
+
20
+ primary_page = Retryable.retryable(tries: :infinite, on: OpenURI::HTTPError) do
21
+ Nokogiri::HTML open(base_url)
22
+ end
23
+
24
+ raise(ArgumentError.new("Invalid StoryID #{storyid}")) if primary_page.css('#profile_top').count == 0
16
25
 
17
26
  title = primary_page.css('#profile_top > b').first.text
18
27
  author = primary_page.css('#profile_top > a').first.text
@@ -23,18 +32,33 @@ module Ficrip
23
32
 
24
33
  info = primary_page.css('#profile_top > span.xgray.xcontrast_txt').text.split(' - ')
25
34
 
26
- s.rating = info.find_with 'Rated: Fiction'
27
- s.language = info[1]
28
- s.genres = info[2].split('/')
29
- s.characters = info[3].strip
30
- s.chapter_count = info.find_with('Chapters:').parse_int
31
- s.word_count = info.find_with('Words:').parse_int
32
- s.review_count = info.find_with('Reviews:').parse_int
33
- s.favs_count = info.find_with('Favs:').parse_int
34
- s.follows_count = info.find_with('Follows:').parse_int
35
- s.updated_date = Date.parse info.find_with('Updated:')
36
- s.published_date = Date.parse info.find_with('Published:')
37
- s.info_id = info.find_with('id:').to_i
35
+ s.rating = info.find_with 'Rated: Fiction'
36
+ s.language = info[1]
37
+ s.genres = info[2].split('/')
38
+ s.characters = info[3].strip
39
+ s.chapter_count = info.find_with('Chapters:').as { |c| c.parse_int unless c.nil? }
40
+ s.word_count = info.find_with('Words:').parse_int
41
+ s.review_count = info.find_with('Reviews:').parse_int
42
+ s.favs_count = info.find_with('Favs:').parse_int
43
+ s.follows_count = info.find_with('Follows:').parse_int
44
+
45
+ s.updated_date = info.find_with('Updated:').as do |d|
46
+ begin
47
+ Date.strptime(d, '%m/%d/%Y')
48
+ rescue
49
+ Date.strptime(d, '%m/%d') rescue (Time.now - ChronicDuration.parse(d)).to_date
50
+ end if d
51
+ end
52
+
53
+ s.published_date = info.find_with('Published:').as do |d|
54
+ begin
55
+ Date.strptime(d, '%m/%d/%Y')
56
+ rescue
57
+ Date.strptime(d, '%m/%d') rescue (Time.now - ChronicDuration.parse(d)).to_date
58
+ end
59
+ end
60
+
61
+ s.info_id = info.find_with('id:').to_i
38
62
 
39
63
  raise(Exception.new("Error! StoryID and parsed ID don't match.")) if s.info_id != storyid
40
64
 
@@ -46,14 +70,14 @@ module Ficrip
46
70
  if chapter_select
47
71
  s.chapters = chapter_select.children.map(&:text)
48
72
  else
49
- s.chapters = "1. #{title}"
73
+ s.chapters = ["1. #{title}"]
50
74
  end
51
75
  end
52
76
  end
53
77
 
54
78
  Contract Integer => GEPUB::Book
55
79
  def self.get(storyid, version: 3)
56
- fetch(storyid).bind(version)
80
+ fetch(storyid).bind(version: version)
57
81
  end
58
82
 
59
83
  end
data/lib/ficrip/story.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  require 'contracts'
2
2
  require 'open-uri'
3
+ require 'retryable'
4
+ require 'gepub'
5
+
3
6
  require_relative 'extensions'
4
7
 
5
8
  module Ficrip
@@ -7,7 +10,7 @@ module Ficrip
7
10
  include Contracts::Core
8
11
  include Contracts::Builtin
9
12
 
10
- attr_accessor :title, :author, :metadata
13
+ attr_accessor :title, :author, :url, :metadata
11
14
 
12
15
  DOCTYPE = {
13
16
  3 => '<!DOCTYPE html>',
@@ -21,11 +24,6 @@ module Ficrip
21
24
  @metadata = metadata
22
25
  end
23
26
 
24
- def construct
25
- yield self
26
- self
27
- end
28
-
29
27
  def self.construct(title, author, url)
30
28
  s = Story::new(title, author, url)
31
29
  yield s
@@ -43,7 +41,14 @@ module Ficrip
43
41
  end
44
42
  end
45
43
 
44
+ def respond_to_missing?(method_sym, include_private = false)
45
+ @metadata.key?(method_sym) ||
46
+ (method_sym != :title= && method_sym != :author= && method_sym.to_s.end_with?('=')) ||
47
+ super
48
+ end
49
+
46
50
  Contract Symbol, String => Story
51
+
47
52
  def add_metadata(key, value)
48
53
  @metadata[key] = value
49
54
  self
@@ -51,8 +56,6 @@ module Ficrip
51
56
 
52
57
  # Contract { version: Maybe[Or[2, 3]] }
53
58
  def bind(version: 3, callback: nil)
54
-
55
-
56
59
  book = GEPUB::Book.new('OEPBS/package.opf', 'version' => version.to_f.to_s)
57
60
  book.primary_identifier(@url, 'BookId', 'URL')
58
61
 
@@ -62,7 +65,7 @@ module Ficrip
62
65
 
63
66
  # Cover if it exists
64
67
  if @metadata.key? :cover_url
65
- cover = open(@metadata[:cover_url], 'Referer' => @url)
68
+ cover = open!(@metadata[:cover_url], 'Referer' => @url)
66
69
  cover_type = FastImage.type(cover)
67
70
  book.add_item(format('img/cover_image.%s', cover_type), cover)
68
71
  .cover_image
@@ -91,18 +94,49 @@ module Ficrip
91
94
  XHTML
92
95
  end
93
96
 
97
+ titlepage = <<-XHTML.strip_heredoc
98
+ <?xml version="1.0" encoding="utf-8"?>
99
+ #{DOCTYPE[version]}
100
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
101
+ <head>
102
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
103
+ <title>#{@title}</title>
104
+ <style type="text/css" title="override_css">
105
+ .outer { display: table; height: 75%; width: 100%; }
106
+ .middle { display: table-cell; vertical-align: middle; }
107
+ .inner { text-align: center; }
108
+ </style>
109
+ </head>
110
+ <body>
111
+ <div class="outer"><div class="middle"><div class="inner">
112
+ <h1>#{@title}</h1>
113
+ <h3>#{@author}</h3>
114
+ </div></div></div>
115
+ </body>
116
+ </html>
117
+ XHTML
118
+
119
+
120
+ table_of_contents = nil
94
121
  book.ordered do
95
122
  book.add_item('img/coverpage.xhtml')
96
123
  .add_content(StringIO.new(coverpage))
97
124
  .toc_text(@title) if @metadata.key? :cover_url
98
125
 
99
- unless @metadata[:chapters]
100
- @metadata[:chapters] = @url
101
- end
126
+ book.add_item('text/titlepage.xhtml')
127
+ .add_content(StringIO.new(Nokogiri::XML(titlepage) { |c| c.noblanks }.to_xhtml(indent: 2)))
128
+
129
+ book.add_item('text/infopage.xhtml')
130
+ .add_content(StringIO.new(Nokogiri::XML(render_metadata) { |c| c.noblanks }.to_xhtml(indent: 2)))
131
+ .toc_text('About')
132
+
133
+ # We want our TOC to be after the cover and titlepage, but we don't any content
134
+ # for it yet, so we save it for later.
135
+ table_of_contents = book.add_item('text/toc.xhtml').toc_text('Table of Contents')
102
136
 
103
137
  chapters.each do |chapter|
104
138
  chapter_num, chapter_title = chapter.match(/^(\d+)\s*[-\\.)]?\s+(.*)/).captures
105
- chapter_page = Nokogiri::HTML open(URI.join(@url, chapter_num))
139
+ chapter_page = Nokogiri::HTML open!(URI.join(@url, chapter_num))
106
140
 
107
141
  storytext = chapter_page.css('#storytext').first
108
142
  storytext.xpath('//@noshade').remove
@@ -120,20 +154,104 @@ module Ficrip
120
154
  #{'<section epub:type="chapter">' if version == 3}
121
155
  <h1 style="text-align:center">#{chapter_title}</h1>
122
156
  #{storytext.children.to_xhtml}
123
- #{'</section>' if version == 3}
157
+ #{'</section>' if version == 3}
124
158
  </body>
125
159
  </html>
126
160
  XHTML
127
161
 
128
- book.add_item(format('text/chapter%03d.xhtml', chapter_num),
129
- nil, "c#{chapter_num}")
130
- .add_content(StringIO.new(Nokogiri::XML(chapter_xhtml){|c| c.noblanks}.to_xhtml(indent:2)))
162
+ book.add_item(format('text/chapter%03d.xhtml', chapter_num), nil, "c#{chapter_num}")
163
+ .add_content(StringIO.new(Nokogiri::XML(chapter_xhtml) { |c| c.noblanks }.to_xhtml(indent: 2)))
131
164
  .toc_text(chapter_title)
132
165
 
133
- callback.call if callback
166
+ if callback
167
+ args = [chapter_num.to_i, chapters.count]
168
+ n = callback.arity
169
+ callback.call *(n < 0 ? args : args.take(n))
170
+ end
134
171
  end
135
172
  end
173
+
174
+ # This generates a proper Table of Contents page at the start of the book by
175
+ # removing references to the cover and TOC itself
176
+ book_copy = book.deep_clone
177
+ cut_idx = @metadata.key?(:cover_url) ? 3 : 2
178
+ book_copy.instance_variable_set(:@toc, book_copy.instance_variable_get(:@toc)[cut_idx..-1])
179
+ table_of_contents.add_content(StringIO.new(book_copy.nav_doc)) # Finally, we get to add the actual content
180
+
181
+ # Now that we've generated the TOC page, go through every chapter reference in the
182
+ # toc and prepend the chapter number. This is for the epub's built-in table of contents
183
+ book.instance_variable_get(:@toc)[cut_idx..-1].each_with_index do |chap, idx|
184
+ chap[:text] = "#{idx + 1}. #{chap[:text]}"
185
+ end
186
+
187
+ add_item('nav.html', StringIO.new(book_copy.nav_doc), 'nav').add_property('nav') if version == 3
188
+
136
189
  book
137
190
  end
191
+
192
+ def render_metadata
193
+ data = {
194
+ 'Rating' => rating,
195
+ 'Language' => language,
196
+ 'Genres' => genres.join(', '),
197
+ 'Characters/Pairings' => characters,
198
+ 'Chapter count' => format_num(chapter_count),
199
+ 'Word count' => format_num(word_count),
200
+ 'Reviews' => "<a href='https://fanfiction.com/r/#{info_id}'>" + format_num(review_count) + '</a>',
201
+ 'Favorites' => format_num(favs_count),
202
+ 'Follows' => format_num(follows_count),
203
+ 'Updated' => updated_date,
204
+ 'Published' => published_date,
205
+ 'ID' => info_id
206
+ }
207
+
208
+ Nokogiri::XML::Builder.new(encoding: 'utf-8') { |doc|
209
+ doc.html('xmlns' => 'http://www.w3.org/1999/xhtml', 'xml:lang' => 'en') {
210
+ doc.head {
211
+ doc.meta 'http-equiv' => 'Content-Type', 'content' => 'text/html; charset=UTF-8'
212
+ doc.title 'About'
213
+ }
214
+ doc.body {
215
+ doc.p { doc.strong 'Author: '; doc.a(href: author_url) { doc.text @author } }
216
+ doc.p { doc.strong 'Summary:'; doc.br; doc.text summary }
217
+ data.each do |k, v|
218
+ doc.span {
219
+ doc.strong(k + ':')
220
+ (v.to_s.start_with? '<a') ? doc << v.to_s : doc.text(' ' + v.to_s)
221
+ doc.br
222
+ }
223
+ end
224
+ }
225
+ }
226
+ }.to_xml
227
+ end
228
+
229
+ private
230
+ def open!(*args, &block)
231
+ Retryable.retryable(tries: :infinite, on: OpenURI::HTTPError) do
232
+ open(*args, &block)
233
+ end
234
+ end
235
+
236
+ def handle_url(string)
237
+ if string.start_with? 'https://www.fanfiction.net/u/'
238
+ "<a href='#{string}'>"
239
+ end
240
+ end
241
+
242
+ # # File activesupport/lib/active_support/inflector/methods.rb, line 123
243
+ # def humanize(word, capitalize: false)
244
+ # result = word.to_s.dup
245
+ # result.sub!(/\A_+/, ''.freeze)
246
+ # result.sub!(/_id\z/, ''.freeze)
247
+ # result.tr!('_'.freeze, ' '.freeze)
248
+ # result.gsub!(/([a-z\d]*)/i) { |match| match.downcase }
249
+ # result.sub!(/\A\w/) { |match| match.upcase } if capitalize
250
+ # result
251
+ # end
252
+
253
+ def format_num(num)
254
+ num.to_s.reverse.gsub(/...(?=.)/, '\&,').reverse
255
+ end
138
256
  end
139
- end
257
+ end
@@ -1,3 +1,3 @@
1
1
  module Ficrip
2
- VERSION = '0.0.1'
2
+ VERSION = '0.0.2'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ficrip
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Katherine Whitlock
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-21 00:00:00.000000000 Z
11
+ date: 2016-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -109,33 +109,61 @@ dependencies:
109
109
  - !ruby/object:Gem::Version
110
110
  version: '1.8'
111
111
  - !ruby/object:Gem::Dependency
112
- name: bundler
112
+ name: chronic_duration
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: '1.12'
118
- type: :development
117
+ version: '0.10'
118
+ type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: '1.12'
124
+ version: '0.10'
125
125
  - !ruby/object:Gem::Dependency
126
- name: rake
126
+ name: retryable
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: '11.2'
132
- type: :development
131
+ version: '2.0'
132
+ type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: '11.2'
138
+ version: '2.0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: bundler
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: rake
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
139
167
  - !ruby/object:Gem::Dependency
140
168
  name: rspec
141
169
  requirement: !ruby/object:Gem::Requirement