ficrip 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ficrip +2 -1
- data/ficrip.gemspec +4 -2
- data/lib/ficrip/extensions.rb +29 -1
- data/lib/ficrip/process.rb +41 -17
- data/lib/ficrip/story.rb +137 -19
- data/lib/ficrip/version.rb +1 -1
- metadata +38 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1a777da71d255f1d0dd702848a84d4442f5e8f54
|
4
|
+
data.tar.gz: e7b03da00e5e7aa55fda7c3b2771bb60c4061eb7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d22c5f1d7d942af2c2f0eaacce64877b32b816e209215ac4818e26e730d809422419a60322fce4b8e7e5ef60f8b93e2cc5639d5fa496028b71421da72a772206
|
7
|
+
data.tar.gz: c0e711f7c43c3e826f16b274384efb5f09d41d27358447f9e5598899b769e4911a20d12950a2f83fcfe9f3c20faa1b64a2311b500366836871c091521ba28d51
|
data/bin/ficrip
CHANGED
@@ -53,7 +53,8 @@ storyids.each_with_index do |storyid, idx|
|
|
53
53
|
progressbar.total = fic.chapters.count
|
54
54
|
progressbar.format = prefix + "#{author_title} | %a [%B] %p%% (%c/%C)"
|
55
55
|
|
56
|
-
|
56
|
+
|
57
|
+
epub = fic.bind(version: opts[:epub2] ? 2 : 3, callback: lambda { progressbar.increment })
|
57
58
|
|
58
59
|
progressbar.format = prefix + "#{author_title}... Binding"
|
59
60
|
filename = File.join(Dir.pwd, "#{author_title}.epub")
|
data/ficrip.gemspec
CHANGED
@@ -24,8 +24,10 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_dependency 'contracts', '~> 0.14'
|
25
25
|
spec.add_dependency 'i18n_data', '~> 0.7'
|
26
26
|
spec.add_dependency 'fastimage', '~> 1.8'
|
27
|
+
spec.add_dependency 'chronic_duration', '~> 0.10'
|
28
|
+
spec.add_dependency 'retryable', '~> 2.0'
|
27
29
|
|
28
|
-
spec.add_development_dependency 'bundler'
|
29
|
-
spec.add_development_dependency 'rake'
|
30
|
+
spec.add_development_dependency 'bundler'
|
31
|
+
spec.add_development_dependency 'rake'
|
30
32
|
spec.add_development_dependency 'rspec', '~> 3.5'
|
31
33
|
end
|
data/lib/ficrip/extensions.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
class Array
|
2
2
|
def find_with(str)
|
3
|
-
find { |i| i.start_with? str }
|
3
|
+
r = find { |i| i.start_with? str }
|
4
|
+
r.gsub(str, '').strip if r
|
4
5
|
end
|
5
6
|
end
|
6
7
|
|
@@ -14,3 +15,30 @@ class String
|
|
14
15
|
gsub(/^[ \t]{#{indent}}/, '')
|
15
16
|
end
|
16
17
|
end
|
18
|
+
|
19
|
+
class Object
|
20
|
+
def as
|
21
|
+
yield self
|
22
|
+
end
|
23
|
+
|
24
|
+
# From http://stackoverflow.com/a/8206537
|
25
|
+
def deep_clone
|
26
|
+
return @deep_cloning_obj if @deep_cloning
|
27
|
+
@deep_cloning_obj = clone
|
28
|
+
@deep_cloning_obj.instance_variables.each do |var|
|
29
|
+
val = @deep_cloning_obj.instance_variable_get(var)
|
30
|
+
begin
|
31
|
+
@deep_cloning = true
|
32
|
+
val = val.deep_clone
|
33
|
+
rescue TypeError
|
34
|
+
next
|
35
|
+
ensure
|
36
|
+
@deep_cloning = false
|
37
|
+
end
|
38
|
+
@deep_cloning_obj.instance_variable_set(var, val)
|
39
|
+
end
|
40
|
+
deep_cloning_obj = @deep_cloning_obj
|
41
|
+
@deep_cloning_obj = nil
|
42
|
+
deep_cloning_obj
|
43
|
+
end
|
44
|
+
end
|
data/lib/ficrip/process.rb
CHANGED
@@ -1,8 +1,12 @@
|
|
1
1
|
require 'contracts'
|
2
|
-
require '
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
# Helpers
|
3
5
|
require 'i18n_data'
|
4
6
|
require 'fastimage'
|
5
|
-
require '
|
7
|
+
require 'chronic_duration'
|
8
|
+
require 'retryable'
|
9
|
+
|
6
10
|
require_relative 'extensions'
|
7
11
|
|
8
12
|
module Ficrip
|
@@ -12,7 +16,12 @@ module Ficrip
|
|
12
16
|
Contract Integer => Story
|
13
17
|
def self.fetch(storyid)
|
14
18
|
base_url = "https://www.fanfiction.net/s/#{storyid}/"
|
15
|
-
|
19
|
+
|
20
|
+
primary_page = Retryable.retryable(tries: :infinite, on: OpenURI::HTTPError) do
|
21
|
+
Nokogiri::HTML open(base_url)
|
22
|
+
end
|
23
|
+
|
24
|
+
raise(ArgumentError.new("Invalid StoryID #{storyid}")) if primary_page.css('#profile_top').count == 0
|
16
25
|
|
17
26
|
title = primary_page.css('#profile_top > b').first.text
|
18
27
|
author = primary_page.css('#profile_top > a').first.text
|
@@ -23,18 +32,33 @@ module Ficrip
|
|
23
32
|
|
24
33
|
info = primary_page.css('#profile_top > span.xgray.xcontrast_txt').text.split(' - ')
|
25
34
|
|
26
|
-
s.rating
|
27
|
-
s.language
|
28
|
-
s.genres
|
29
|
-
s.characters
|
30
|
-
s.chapter_count
|
31
|
-
s.word_count
|
32
|
-
s.review_count
|
33
|
-
s.favs_count
|
34
|
-
s.follows_count
|
35
|
-
|
36
|
-
s.
|
37
|
-
|
35
|
+
s.rating = info.find_with 'Rated: Fiction'
|
36
|
+
s.language = info[1]
|
37
|
+
s.genres = info[2].split('/')
|
38
|
+
s.characters = info[3].strip
|
39
|
+
s.chapter_count = info.find_with('Chapters:').as { |c| c.parse_int unless c.nil? }
|
40
|
+
s.word_count = info.find_with('Words:').parse_int
|
41
|
+
s.review_count = info.find_with('Reviews:').parse_int
|
42
|
+
s.favs_count = info.find_with('Favs:').parse_int
|
43
|
+
s.follows_count = info.find_with('Follows:').parse_int
|
44
|
+
|
45
|
+
s.updated_date = info.find_with('Updated:').as do |d|
|
46
|
+
begin
|
47
|
+
Date.strptime(d, '%m/%d/%Y')
|
48
|
+
rescue
|
49
|
+
Date.strptime(d, '%m/%d') rescue (Time.now - ChronicDuration.parse(d)).to_date
|
50
|
+
end if d
|
51
|
+
end
|
52
|
+
|
53
|
+
s.published_date = info.find_with('Published:').as do |d|
|
54
|
+
begin
|
55
|
+
Date.strptime(d, '%m/%d/%Y')
|
56
|
+
rescue
|
57
|
+
Date.strptime(d, '%m/%d') rescue (Time.now - ChronicDuration.parse(d)).to_date
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
s.info_id = info.find_with('id:').to_i
|
38
62
|
|
39
63
|
raise(Exception.new("Error! StoryID and parsed ID don't match.")) if s.info_id != storyid
|
40
64
|
|
@@ -46,14 +70,14 @@ module Ficrip
|
|
46
70
|
if chapter_select
|
47
71
|
s.chapters = chapter_select.children.map(&:text)
|
48
72
|
else
|
49
|
-
s.chapters = "1. #{title}"
|
73
|
+
s.chapters = ["1. #{title}"]
|
50
74
|
end
|
51
75
|
end
|
52
76
|
end
|
53
77
|
|
54
78
|
Contract Integer => GEPUB::Book
|
55
79
|
def self.get(storyid, version: 3)
|
56
|
-
fetch(storyid).bind(version)
|
80
|
+
fetch(storyid).bind(version: version)
|
57
81
|
end
|
58
82
|
|
59
83
|
end
|
data/lib/ficrip/story.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
require 'contracts'
|
2
2
|
require 'open-uri'
|
3
|
+
require 'retryable'
|
4
|
+
require 'gepub'
|
5
|
+
|
3
6
|
require_relative 'extensions'
|
4
7
|
|
5
8
|
module Ficrip
|
@@ -7,7 +10,7 @@ module Ficrip
|
|
7
10
|
include Contracts::Core
|
8
11
|
include Contracts::Builtin
|
9
12
|
|
10
|
-
attr_accessor :title, :author, :metadata
|
13
|
+
attr_accessor :title, :author, :url, :metadata
|
11
14
|
|
12
15
|
DOCTYPE = {
|
13
16
|
3 => '<!DOCTYPE html>',
|
@@ -21,11 +24,6 @@ module Ficrip
|
|
21
24
|
@metadata = metadata
|
22
25
|
end
|
23
26
|
|
24
|
-
def construct
|
25
|
-
yield self
|
26
|
-
self
|
27
|
-
end
|
28
|
-
|
29
27
|
def self.construct(title, author, url)
|
30
28
|
s = Story::new(title, author, url)
|
31
29
|
yield s
|
@@ -43,7 +41,14 @@ module Ficrip
|
|
43
41
|
end
|
44
42
|
end
|
45
43
|
|
44
|
+
def respond_to_missing?(method_sym, include_private = false)
|
45
|
+
@metadata.key?(method_sym) ||
|
46
|
+
(method_sym != :title= && method_sym != :author= && method_sym.to_s.end_with?('=')) ||
|
47
|
+
super
|
48
|
+
end
|
49
|
+
|
46
50
|
Contract Symbol, String => Story
|
51
|
+
|
47
52
|
def add_metadata(key, value)
|
48
53
|
@metadata[key] = value
|
49
54
|
self
|
@@ -51,8 +56,6 @@ module Ficrip
|
|
51
56
|
|
52
57
|
# Contract { version: Maybe[Or[2, 3]] }
|
53
58
|
def bind(version: 3, callback: nil)
|
54
|
-
|
55
|
-
|
56
59
|
book = GEPUB::Book.new('OEPBS/package.opf', 'version' => version.to_f.to_s)
|
57
60
|
book.primary_identifier(@url, 'BookId', 'URL')
|
58
61
|
|
@@ -62,7 +65,7 @@ module Ficrip
|
|
62
65
|
|
63
66
|
# Cover if it exists
|
64
67
|
if @metadata.key? :cover_url
|
65
|
-
cover
|
68
|
+
cover = open!(@metadata[:cover_url], 'Referer' => @url)
|
66
69
|
cover_type = FastImage.type(cover)
|
67
70
|
book.add_item(format('img/cover_image.%s', cover_type), cover)
|
68
71
|
.cover_image
|
@@ -91,18 +94,49 @@ module Ficrip
|
|
91
94
|
XHTML
|
92
95
|
end
|
93
96
|
|
97
|
+
titlepage = <<-XHTML.strip_heredoc
|
98
|
+
<?xml version="1.0" encoding="utf-8"?>
|
99
|
+
#{DOCTYPE[version]}
|
100
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
101
|
+
<head>
|
102
|
+
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
103
|
+
<title>#{@title}</title>
|
104
|
+
<style type="text/css" title="override_css">
|
105
|
+
.outer { display: table; height: 75%; width: 100%; }
|
106
|
+
.middle { display: table-cell; vertical-align: middle; }
|
107
|
+
.inner { text-align: center; }
|
108
|
+
</style>
|
109
|
+
</head>
|
110
|
+
<body>
|
111
|
+
<div class="outer"><div class="middle"><div class="inner">
|
112
|
+
<h1>#{@title}</h1>
|
113
|
+
<h3>#{@author}</h3>
|
114
|
+
</div></div></div>
|
115
|
+
</body>
|
116
|
+
</html>
|
117
|
+
XHTML
|
118
|
+
|
119
|
+
|
120
|
+
table_of_contents = nil
|
94
121
|
book.ordered do
|
95
122
|
book.add_item('img/coverpage.xhtml')
|
96
123
|
.add_content(StringIO.new(coverpage))
|
97
124
|
.toc_text(@title) if @metadata.key? :cover_url
|
98
125
|
|
99
|
-
|
100
|
-
|
101
|
-
|
126
|
+
book.add_item('text/titlepage.xhtml')
|
127
|
+
.add_content(StringIO.new(Nokogiri::XML(titlepage) { |c| c.noblanks }.to_xhtml(indent: 2)))
|
128
|
+
|
129
|
+
book.add_item('text/infopage.xhtml')
|
130
|
+
.add_content(StringIO.new(Nokogiri::XML(render_metadata) { |c| c.noblanks }.to_xhtml(indent: 2)))
|
131
|
+
.toc_text('About')
|
132
|
+
|
133
|
+
# We want our TOC to be after the cover and titlepage, but we don't any content
|
134
|
+
# for it yet, so we save it for later.
|
135
|
+
table_of_contents = book.add_item('text/toc.xhtml').toc_text('Table of Contents')
|
102
136
|
|
103
137
|
chapters.each do |chapter|
|
104
138
|
chapter_num, chapter_title = chapter.match(/^(\d+)\s*[-\\.)]?\s+(.*)/).captures
|
105
|
-
chapter_page = Nokogiri::HTML open(URI.join(@url, chapter_num))
|
139
|
+
chapter_page = Nokogiri::HTML open!(URI.join(@url, chapter_num))
|
106
140
|
|
107
141
|
storytext = chapter_page.css('#storytext').first
|
108
142
|
storytext.xpath('//@noshade').remove
|
@@ -120,20 +154,104 @@ module Ficrip
|
|
120
154
|
#{'<section epub:type="chapter">' if version == 3}
|
121
155
|
<h1 style="text-align:center">#{chapter_title}</h1>
|
122
156
|
#{storytext.children.to_xhtml}
|
123
|
-
|
157
|
+
#{'</section>' if version == 3}
|
124
158
|
</body>
|
125
159
|
</html>
|
126
160
|
XHTML
|
127
161
|
|
128
|
-
book.add_item(format('text/chapter%03d.xhtml', chapter_num),
|
129
|
-
|
130
|
-
.add_content(StringIO.new(Nokogiri::XML(chapter_xhtml){|c| c.noblanks}.to_xhtml(indent:2)))
|
162
|
+
book.add_item(format('text/chapter%03d.xhtml', chapter_num), nil, "c#{chapter_num}")
|
163
|
+
.add_content(StringIO.new(Nokogiri::XML(chapter_xhtml) { |c| c.noblanks }.to_xhtml(indent: 2)))
|
131
164
|
.toc_text(chapter_title)
|
132
165
|
|
133
|
-
|
166
|
+
if callback
|
167
|
+
args = [chapter_num.to_i, chapters.count]
|
168
|
+
n = callback.arity
|
169
|
+
callback.call *(n < 0 ? args : args.take(n))
|
170
|
+
end
|
134
171
|
end
|
135
172
|
end
|
173
|
+
|
174
|
+
# This generates a proper Table of Contents page at the start of the book by
|
175
|
+
# removing references to the cover and TOC itself
|
176
|
+
book_copy = book.deep_clone
|
177
|
+
cut_idx = @metadata.key?(:cover_url) ? 3 : 2
|
178
|
+
book_copy.instance_variable_set(:@toc, book_copy.instance_variable_get(:@toc)[cut_idx..-1])
|
179
|
+
table_of_contents.add_content(StringIO.new(book_copy.nav_doc)) # Finally, we get to add the actual content
|
180
|
+
|
181
|
+
# Now that we've generated the TOC page, go through every chapter reference in the
|
182
|
+
# toc and prepend the chapter number. This is for the epub's built-in table of contents
|
183
|
+
book.instance_variable_get(:@toc)[cut_idx..-1].each_with_index do |chap, idx|
|
184
|
+
chap[:text] = "#{idx + 1}. #{chap[:text]}"
|
185
|
+
end
|
186
|
+
|
187
|
+
add_item('nav.html', StringIO.new(book_copy.nav_doc), 'nav').add_property('nav') if version == 3
|
188
|
+
|
136
189
|
book
|
137
190
|
end
|
191
|
+
|
192
|
+
def render_metadata
|
193
|
+
data = {
|
194
|
+
'Rating' => rating,
|
195
|
+
'Language' => language,
|
196
|
+
'Genres' => genres.join(', '),
|
197
|
+
'Characters/Pairings' => characters,
|
198
|
+
'Chapter count' => format_num(chapter_count),
|
199
|
+
'Word count' => format_num(word_count),
|
200
|
+
'Reviews' => "<a href='https://fanfiction.com/r/#{info_id}'>" + format_num(review_count) + '</a>',
|
201
|
+
'Favorites' => format_num(favs_count),
|
202
|
+
'Follows' => format_num(follows_count),
|
203
|
+
'Updated' => updated_date,
|
204
|
+
'Published' => published_date,
|
205
|
+
'ID' => info_id
|
206
|
+
}
|
207
|
+
|
208
|
+
Nokogiri::XML::Builder.new(encoding: 'utf-8') { |doc|
|
209
|
+
doc.html('xmlns' => 'http://www.w3.org/1999/xhtml', 'xml:lang' => 'en') {
|
210
|
+
doc.head {
|
211
|
+
doc.meta 'http-equiv' => 'Content-Type', 'content' => 'text/html; charset=UTF-8'
|
212
|
+
doc.title 'About'
|
213
|
+
}
|
214
|
+
doc.body {
|
215
|
+
doc.p { doc.strong 'Author: '; doc.a(href: author_url) { doc.text @author } }
|
216
|
+
doc.p { doc.strong 'Summary:'; doc.br; doc.text summary }
|
217
|
+
data.each do |k, v|
|
218
|
+
doc.span {
|
219
|
+
doc.strong(k + ':')
|
220
|
+
(v.to_s.start_with? '<a') ? doc << v.to_s : doc.text(' ' + v.to_s)
|
221
|
+
doc.br
|
222
|
+
}
|
223
|
+
end
|
224
|
+
}
|
225
|
+
}
|
226
|
+
}.to_xml
|
227
|
+
end
|
228
|
+
|
229
|
+
private
|
230
|
+
def open!(*args, &block)
|
231
|
+
Retryable.retryable(tries: :infinite, on: OpenURI::HTTPError) do
|
232
|
+
open(*args, &block)
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
def handle_url(string)
|
237
|
+
if string.start_with? 'https://www.fanfiction.net/u/'
|
238
|
+
"<a href='#{string}'>"
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# # File activesupport/lib/active_support/inflector/methods.rb, line 123
|
243
|
+
# def humanize(word, capitalize: false)
|
244
|
+
# result = word.to_s.dup
|
245
|
+
# result.sub!(/\A_+/, ''.freeze)
|
246
|
+
# result.sub!(/_id\z/, ''.freeze)
|
247
|
+
# result.tr!('_'.freeze, ' '.freeze)
|
248
|
+
# result.gsub!(/([a-z\d]*)/i) { |match| match.downcase }
|
249
|
+
# result.sub!(/\A\w/) { |match| match.upcase } if capitalize
|
250
|
+
# result
|
251
|
+
# end
|
252
|
+
|
253
|
+
def format_num(num)
|
254
|
+
num.to_s.reverse.gsub(/...(?=.)/, '\&,').reverse
|
255
|
+
end
|
138
256
|
end
|
139
|
-
end
|
257
|
+
end
|
data/lib/ficrip/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ficrip
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Katherine Whitlock
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -109,33 +109,61 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '1.8'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: chronic_duration
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: '
|
118
|
-
type: :
|
117
|
+
version: '0.10'
|
118
|
+
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: '
|
124
|
+
version: '0.10'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
126
|
+
name: retryable
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: '
|
132
|
-
type: :
|
131
|
+
version: '2.0'
|
132
|
+
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
138
|
+
version: '2.0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: bundler
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rake
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
139
167
|
- !ruby/object:Gem::Dependency
|
140
168
|
name: rspec
|
141
169
|
requirement: !ruby/object:Gem::Requirement
|