ficrip 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ficrip +2 -1
- data/ficrip.gemspec +4 -2
- data/lib/ficrip/extensions.rb +29 -1
- data/lib/ficrip/process.rb +41 -17
- data/lib/ficrip/story.rb +137 -19
- data/lib/ficrip/version.rb +1 -1
- metadata +38 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1a777da71d255f1d0dd702848a84d4442f5e8f54
|
4
|
+
data.tar.gz: e7b03da00e5e7aa55fda7c3b2771bb60c4061eb7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d22c5f1d7d942af2c2f0eaacce64877b32b816e209215ac4818e26e730d809422419a60322fce4b8e7e5ef60f8b93e2cc5639d5fa496028b71421da72a772206
|
7
|
+
data.tar.gz: c0e711f7c43c3e826f16b274384efb5f09d41d27358447f9e5598899b769e4911a20d12950a2f83fcfe9f3c20faa1b64a2311b500366836871c091521ba28d51
|
data/bin/ficrip
CHANGED
@@ -53,7 +53,8 @@ storyids.each_with_index do |storyid, idx|
|
|
53
53
|
progressbar.total = fic.chapters.count
|
54
54
|
progressbar.format = prefix + "#{author_title} | %a [%B] %p%% (%c/%C)"
|
55
55
|
|
56
|
-
|
56
|
+
|
57
|
+
epub = fic.bind(version: opts[:epub2] ? 2 : 3, callback: lambda { progressbar.increment })
|
57
58
|
|
58
59
|
progressbar.format = prefix + "#{author_title}... Binding"
|
59
60
|
filename = File.join(Dir.pwd, "#{author_title}.epub")
|
data/ficrip.gemspec
CHANGED
@@ -24,8 +24,10 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_dependency 'contracts', '~> 0.14'
|
25
25
|
spec.add_dependency 'i18n_data', '~> 0.7'
|
26
26
|
spec.add_dependency 'fastimage', '~> 1.8'
|
27
|
+
spec.add_dependency 'chronic_duration', '~> 0.10'
|
28
|
+
spec.add_dependency 'retryable', '~> 2.0'
|
27
29
|
|
28
|
-
spec.add_development_dependency 'bundler'
|
29
|
-
spec.add_development_dependency 'rake'
|
30
|
+
spec.add_development_dependency 'bundler'
|
31
|
+
spec.add_development_dependency 'rake'
|
30
32
|
spec.add_development_dependency 'rspec', '~> 3.5'
|
31
33
|
end
|
data/lib/ficrip/extensions.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
class Array
|
2
2
|
def find_with(str)
|
3
|
-
find { |i| i.start_with? str }
|
3
|
+
r = find { |i| i.start_with? str }
|
4
|
+
r.gsub(str, '').strip if r
|
4
5
|
end
|
5
6
|
end
|
6
7
|
|
@@ -14,3 +15,30 @@ class String
|
|
14
15
|
gsub(/^[ \t]{#{indent}}/, '')
|
15
16
|
end
|
16
17
|
end
|
18
|
+
|
19
|
+
class Object
|
20
|
+
def as
|
21
|
+
yield self
|
22
|
+
end
|
23
|
+
|
24
|
+
# From http://stackoverflow.com/a/8206537
|
25
|
+
def deep_clone
|
26
|
+
return @deep_cloning_obj if @deep_cloning
|
27
|
+
@deep_cloning_obj = clone
|
28
|
+
@deep_cloning_obj.instance_variables.each do |var|
|
29
|
+
val = @deep_cloning_obj.instance_variable_get(var)
|
30
|
+
begin
|
31
|
+
@deep_cloning = true
|
32
|
+
val = val.deep_clone
|
33
|
+
rescue TypeError
|
34
|
+
next
|
35
|
+
ensure
|
36
|
+
@deep_cloning = false
|
37
|
+
end
|
38
|
+
@deep_cloning_obj.instance_variable_set(var, val)
|
39
|
+
end
|
40
|
+
deep_cloning_obj = @deep_cloning_obj
|
41
|
+
@deep_cloning_obj = nil
|
42
|
+
deep_cloning_obj
|
43
|
+
end
|
44
|
+
end
|
data/lib/ficrip/process.rb
CHANGED
@@ -1,8 +1,12 @@
|
|
1
1
|
require 'contracts'
|
2
|
-
require '
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
# Helpers
|
3
5
|
require 'i18n_data'
|
4
6
|
require 'fastimage'
|
5
|
-
require '
|
7
|
+
require 'chronic_duration'
|
8
|
+
require 'retryable'
|
9
|
+
|
6
10
|
require_relative 'extensions'
|
7
11
|
|
8
12
|
module Ficrip
|
@@ -12,7 +16,12 @@ module Ficrip
|
|
12
16
|
Contract Integer => Story
|
13
17
|
def self.fetch(storyid)
|
14
18
|
base_url = "https://www.fanfiction.net/s/#{storyid}/"
|
15
|
-
|
19
|
+
|
20
|
+
primary_page = Retryable.retryable(tries: :infinite, on: OpenURI::HTTPError) do
|
21
|
+
Nokogiri::HTML open(base_url)
|
22
|
+
end
|
23
|
+
|
24
|
+
raise(ArgumentError.new("Invalid StoryID #{storyid}")) if primary_page.css('#profile_top').count == 0
|
16
25
|
|
17
26
|
title = primary_page.css('#profile_top > b').first.text
|
18
27
|
author = primary_page.css('#profile_top > a').first.text
|
@@ -23,18 +32,33 @@ module Ficrip
|
|
23
32
|
|
24
33
|
info = primary_page.css('#profile_top > span.xgray.xcontrast_txt').text.split(' - ')
|
25
34
|
|
26
|
-
s.rating
|
27
|
-
s.language
|
28
|
-
s.genres
|
29
|
-
s.characters
|
30
|
-
s.chapter_count
|
31
|
-
s.word_count
|
32
|
-
s.review_count
|
33
|
-
s.favs_count
|
34
|
-
s.follows_count
|
35
|
-
|
36
|
-
s.
|
37
|
-
|
35
|
+
s.rating = info.find_with 'Rated: Fiction'
|
36
|
+
s.language = info[1]
|
37
|
+
s.genres = info[2].split('/')
|
38
|
+
s.characters = info[3].strip
|
39
|
+
s.chapter_count = info.find_with('Chapters:').as { |c| c.parse_int unless c.nil? }
|
40
|
+
s.word_count = info.find_with('Words:').parse_int
|
41
|
+
s.review_count = info.find_with('Reviews:').parse_int
|
42
|
+
s.favs_count = info.find_with('Favs:').parse_int
|
43
|
+
s.follows_count = info.find_with('Follows:').parse_int
|
44
|
+
|
45
|
+
s.updated_date = info.find_with('Updated:').as do |d|
|
46
|
+
begin
|
47
|
+
Date.strptime(d, '%m/%d/%Y')
|
48
|
+
rescue
|
49
|
+
Date.strptime(d, '%m/%d') rescue (Time.now - ChronicDuration.parse(d)).to_date
|
50
|
+
end if d
|
51
|
+
end
|
52
|
+
|
53
|
+
s.published_date = info.find_with('Published:').as do |d|
|
54
|
+
begin
|
55
|
+
Date.strptime(d, '%m/%d/%Y')
|
56
|
+
rescue
|
57
|
+
Date.strptime(d, '%m/%d') rescue (Time.now - ChronicDuration.parse(d)).to_date
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
s.info_id = info.find_with('id:').to_i
|
38
62
|
|
39
63
|
raise(Exception.new("Error! StoryID and parsed ID don't match.")) if s.info_id != storyid
|
40
64
|
|
@@ -46,14 +70,14 @@ module Ficrip
|
|
46
70
|
if chapter_select
|
47
71
|
s.chapters = chapter_select.children.map(&:text)
|
48
72
|
else
|
49
|
-
s.chapters = "1. #{title}"
|
73
|
+
s.chapters = ["1. #{title}"]
|
50
74
|
end
|
51
75
|
end
|
52
76
|
end
|
53
77
|
|
54
78
|
Contract Integer => GEPUB::Book
|
55
79
|
def self.get(storyid, version: 3)
|
56
|
-
fetch(storyid).bind(version)
|
80
|
+
fetch(storyid).bind(version: version)
|
57
81
|
end
|
58
82
|
|
59
83
|
end
|
data/lib/ficrip/story.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
require 'contracts'
|
2
2
|
require 'open-uri'
|
3
|
+
require 'retryable'
|
4
|
+
require 'gepub'
|
5
|
+
|
3
6
|
require_relative 'extensions'
|
4
7
|
|
5
8
|
module Ficrip
|
@@ -7,7 +10,7 @@ module Ficrip
|
|
7
10
|
include Contracts::Core
|
8
11
|
include Contracts::Builtin
|
9
12
|
|
10
|
-
attr_accessor :title, :author, :metadata
|
13
|
+
attr_accessor :title, :author, :url, :metadata
|
11
14
|
|
12
15
|
DOCTYPE = {
|
13
16
|
3 => '<!DOCTYPE html>',
|
@@ -21,11 +24,6 @@ module Ficrip
|
|
21
24
|
@metadata = metadata
|
22
25
|
end
|
23
26
|
|
24
|
-
def construct
|
25
|
-
yield self
|
26
|
-
self
|
27
|
-
end
|
28
|
-
|
29
27
|
def self.construct(title, author, url)
|
30
28
|
s = Story::new(title, author, url)
|
31
29
|
yield s
|
@@ -43,7 +41,14 @@ module Ficrip
|
|
43
41
|
end
|
44
42
|
end
|
45
43
|
|
44
|
+
def respond_to_missing?(method_sym, include_private = false)
|
45
|
+
@metadata.key?(method_sym) ||
|
46
|
+
(method_sym != :title= && method_sym != :author= && method_sym.to_s.end_with?('=')) ||
|
47
|
+
super
|
48
|
+
end
|
49
|
+
|
46
50
|
Contract Symbol, String => Story
|
51
|
+
|
47
52
|
def add_metadata(key, value)
|
48
53
|
@metadata[key] = value
|
49
54
|
self
|
@@ -51,8 +56,6 @@ module Ficrip
|
|
51
56
|
|
52
57
|
# Contract { version: Maybe[Or[2, 3]] }
|
53
58
|
def bind(version: 3, callback: nil)
|
54
|
-
|
55
|
-
|
56
59
|
book = GEPUB::Book.new('OEPBS/package.opf', 'version' => version.to_f.to_s)
|
57
60
|
book.primary_identifier(@url, 'BookId', 'URL')
|
58
61
|
|
@@ -62,7 +65,7 @@ module Ficrip
|
|
62
65
|
|
63
66
|
# Cover if it exists
|
64
67
|
if @metadata.key? :cover_url
|
65
|
-
cover
|
68
|
+
cover = open!(@metadata[:cover_url], 'Referer' => @url)
|
66
69
|
cover_type = FastImage.type(cover)
|
67
70
|
book.add_item(format('img/cover_image.%s', cover_type), cover)
|
68
71
|
.cover_image
|
@@ -91,18 +94,49 @@ module Ficrip
|
|
91
94
|
XHTML
|
92
95
|
end
|
93
96
|
|
97
|
+
titlepage = <<-XHTML.strip_heredoc
|
98
|
+
<?xml version="1.0" encoding="utf-8"?>
|
99
|
+
#{DOCTYPE[version]}
|
100
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
101
|
+
<head>
|
102
|
+
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
103
|
+
<title>#{@title}</title>
|
104
|
+
<style type="text/css" title="override_css">
|
105
|
+
.outer { display: table; height: 75%; width: 100%; }
|
106
|
+
.middle { display: table-cell; vertical-align: middle; }
|
107
|
+
.inner { text-align: center; }
|
108
|
+
</style>
|
109
|
+
</head>
|
110
|
+
<body>
|
111
|
+
<div class="outer"><div class="middle"><div class="inner">
|
112
|
+
<h1>#{@title}</h1>
|
113
|
+
<h3>#{@author}</h3>
|
114
|
+
</div></div></div>
|
115
|
+
</body>
|
116
|
+
</html>
|
117
|
+
XHTML
|
118
|
+
|
119
|
+
|
120
|
+
table_of_contents = nil
|
94
121
|
book.ordered do
|
95
122
|
book.add_item('img/coverpage.xhtml')
|
96
123
|
.add_content(StringIO.new(coverpage))
|
97
124
|
.toc_text(@title) if @metadata.key? :cover_url
|
98
125
|
|
99
|
-
|
100
|
-
|
101
|
-
|
126
|
+
book.add_item('text/titlepage.xhtml')
|
127
|
+
.add_content(StringIO.new(Nokogiri::XML(titlepage) { |c| c.noblanks }.to_xhtml(indent: 2)))
|
128
|
+
|
129
|
+
book.add_item('text/infopage.xhtml')
|
130
|
+
.add_content(StringIO.new(Nokogiri::XML(render_metadata) { |c| c.noblanks }.to_xhtml(indent: 2)))
|
131
|
+
.toc_text('About')
|
132
|
+
|
133
|
+
# We want our TOC to be after the cover and titlepage, but we don't any content
|
134
|
+
# for it yet, so we save it for later.
|
135
|
+
table_of_contents = book.add_item('text/toc.xhtml').toc_text('Table of Contents')
|
102
136
|
|
103
137
|
chapters.each do |chapter|
|
104
138
|
chapter_num, chapter_title = chapter.match(/^(\d+)\s*[-\\.)]?\s+(.*)/).captures
|
105
|
-
chapter_page = Nokogiri::HTML open(URI.join(@url, chapter_num))
|
139
|
+
chapter_page = Nokogiri::HTML open!(URI.join(@url, chapter_num))
|
106
140
|
|
107
141
|
storytext = chapter_page.css('#storytext').first
|
108
142
|
storytext.xpath('//@noshade').remove
|
@@ -120,20 +154,104 @@ module Ficrip
|
|
120
154
|
#{'<section epub:type="chapter">' if version == 3}
|
121
155
|
<h1 style="text-align:center">#{chapter_title}</h1>
|
122
156
|
#{storytext.children.to_xhtml}
|
123
|
-
|
157
|
+
#{'</section>' if version == 3}
|
124
158
|
</body>
|
125
159
|
</html>
|
126
160
|
XHTML
|
127
161
|
|
128
|
-
book.add_item(format('text/chapter%03d.xhtml', chapter_num),
|
129
|
-
|
130
|
-
.add_content(StringIO.new(Nokogiri::XML(chapter_xhtml){|c| c.noblanks}.to_xhtml(indent:2)))
|
162
|
+
book.add_item(format('text/chapter%03d.xhtml', chapter_num), nil, "c#{chapter_num}")
|
163
|
+
.add_content(StringIO.new(Nokogiri::XML(chapter_xhtml) { |c| c.noblanks }.to_xhtml(indent: 2)))
|
131
164
|
.toc_text(chapter_title)
|
132
165
|
|
133
|
-
|
166
|
+
if callback
|
167
|
+
args = [chapter_num.to_i, chapters.count]
|
168
|
+
n = callback.arity
|
169
|
+
callback.call *(n < 0 ? args : args.take(n))
|
170
|
+
end
|
134
171
|
end
|
135
172
|
end
|
173
|
+
|
174
|
+
# This generates a proper Table of Contents page at the start of the book by
|
175
|
+
# removing references to the cover and TOC itself
|
176
|
+
book_copy = book.deep_clone
|
177
|
+
cut_idx = @metadata.key?(:cover_url) ? 3 : 2
|
178
|
+
book_copy.instance_variable_set(:@toc, book_copy.instance_variable_get(:@toc)[cut_idx..-1])
|
179
|
+
table_of_contents.add_content(StringIO.new(book_copy.nav_doc)) # Finally, we get to add the actual content
|
180
|
+
|
181
|
+
# Now that we've generated the TOC page, go through every chapter reference in the
|
182
|
+
# toc and prepend the chapter number. This is for the epub's built-in table of contents
|
183
|
+
book.instance_variable_get(:@toc)[cut_idx..-1].each_with_index do |chap, idx|
|
184
|
+
chap[:text] = "#{idx + 1}. #{chap[:text]}"
|
185
|
+
end
|
186
|
+
|
187
|
+
add_item('nav.html', StringIO.new(book_copy.nav_doc), 'nav').add_property('nav') if version == 3
|
188
|
+
|
136
189
|
book
|
137
190
|
end
|
191
|
+
|
192
|
+
def render_metadata
|
193
|
+
data = {
|
194
|
+
'Rating' => rating,
|
195
|
+
'Language' => language,
|
196
|
+
'Genres' => genres.join(', '),
|
197
|
+
'Characters/Pairings' => characters,
|
198
|
+
'Chapter count' => format_num(chapter_count),
|
199
|
+
'Word count' => format_num(word_count),
|
200
|
+
'Reviews' => "<a href='https://fanfiction.com/r/#{info_id}'>" + format_num(review_count) + '</a>',
|
201
|
+
'Favorites' => format_num(favs_count),
|
202
|
+
'Follows' => format_num(follows_count),
|
203
|
+
'Updated' => updated_date,
|
204
|
+
'Published' => published_date,
|
205
|
+
'ID' => info_id
|
206
|
+
}
|
207
|
+
|
208
|
+
Nokogiri::XML::Builder.new(encoding: 'utf-8') { |doc|
|
209
|
+
doc.html('xmlns' => 'http://www.w3.org/1999/xhtml', 'xml:lang' => 'en') {
|
210
|
+
doc.head {
|
211
|
+
doc.meta 'http-equiv' => 'Content-Type', 'content' => 'text/html; charset=UTF-8'
|
212
|
+
doc.title 'About'
|
213
|
+
}
|
214
|
+
doc.body {
|
215
|
+
doc.p { doc.strong 'Author: '; doc.a(href: author_url) { doc.text @author } }
|
216
|
+
doc.p { doc.strong 'Summary:'; doc.br; doc.text summary }
|
217
|
+
data.each do |k, v|
|
218
|
+
doc.span {
|
219
|
+
doc.strong(k + ':')
|
220
|
+
(v.to_s.start_with? '<a') ? doc << v.to_s : doc.text(' ' + v.to_s)
|
221
|
+
doc.br
|
222
|
+
}
|
223
|
+
end
|
224
|
+
}
|
225
|
+
}
|
226
|
+
}.to_xml
|
227
|
+
end
|
228
|
+
|
229
|
+
private
|
230
|
+
def open!(*args, &block)
|
231
|
+
Retryable.retryable(tries: :infinite, on: OpenURI::HTTPError) do
|
232
|
+
open(*args, &block)
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
def handle_url(string)
|
237
|
+
if string.start_with? 'https://www.fanfiction.net/u/'
|
238
|
+
"<a href='#{string}'>"
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# # File activesupport/lib/active_support/inflector/methods.rb, line 123
|
243
|
+
# def humanize(word, capitalize: false)
|
244
|
+
# result = word.to_s.dup
|
245
|
+
# result.sub!(/\A_+/, ''.freeze)
|
246
|
+
# result.sub!(/_id\z/, ''.freeze)
|
247
|
+
# result.tr!('_'.freeze, ' '.freeze)
|
248
|
+
# result.gsub!(/([a-z\d]*)/i) { |match| match.downcase }
|
249
|
+
# result.sub!(/\A\w/) { |match| match.upcase } if capitalize
|
250
|
+
# result
|
251
|
+
# end
|
252
|
+
|
253
|
+
def format_num(num)
|
254
|
+
num.to_s.reverse.gsub(/...(?=.)/, '\&,').reverse
|
255
|
+
end
|
138
256
|
end
|
139
|
-
end
|
257
|
+
end
|
data/lib/ficrip/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ficrip
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Katherine Whitlock
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -109,33 +109,61 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '1.8'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: chronic_duration
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: '
|
118
|
-
type: :
|
117
|
+
version: '0.10'
|
118
|
+
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: '
|
124
|
+
version: '0.10'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
126
|
+
name: retryable
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: '
|
132
|
-
type: :
|
131
|
+
version: '2.0'
|
132
|
+
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
138
|
+
version: '2.0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: bundler
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rake
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
139
167
|
- !ruby/object:Gem::Dependency
|
140
168
|
name: rspec
|
141
169
|
requirement: !ruby/object:Gem::Requirement
|