kindle_manager 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/README.md +2 -2
- data/kindle_manager.gemspec +1 -1
- data/lib/kindle_manager/adapters/base_adapter.rb +1 -1
- data/lib/kindle_manager/adapters/books_adapter.rb +18 -5
- data/lib/kindle_manager/adapters/highlights_adapter.rb +1 -1
- data/lib/kindle_manager/client.rb +2 -7
- data/lib/kindle_manager/file_store.rb +9 -15
- data/lib/kindle_manager/parsers/base_parser.rb +7 -0
- data/lib/kindle_manager/parsers/books_parser.rb +6 -2
- data/lib/kindle_manager/parsers/common.rb +5 -0
- data/lib/kindle_manager/parsers/highlights_parser.rb +3 -2
- data/lib/kindle_manager/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5df40793942c890de33041d60b6e27efe076a695
|
4
|
+
data.tar.gz: 31a5cbce41c6b166de7954cb876f041fc4ddf3ec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d9ac40336b50f1f6f10c201f11eef301eb11ae373e8bd9fd88c5167edd00e4cf6edc95fda3b82f2436d440d1dff8f9ca8e459708967d66fd0756086927a6b30
|
7
|
+
data.tar.gz: 315a3ae71152310075fe4d855355bd93c280af92c6d400f2bbb5ae2333941380f598b8092373f3aa3d63b0a371162e1a050407e772b437abe181a96b28b17c11
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -54,7 +54,7 @@ In console
|
|
54
54
|
|
55
55
|
```ruby
|
56
56
|
require 'kindle_manager'
|
57
|
-
client = KindleManager::Client.new(verbose: true, limit: 1000)
|
57
|
+
client = KindleManager::Client.new(keep_cookie: true, verbose: true, limit: 1000)
|
58
58
|
client.fetch_kindle_list
|
59
59
|
|
60
60
|
books = client.load_kindle_books
|
@@ -89,7 +89,7 @@ In console
|
|
89
89
|
|
90
90
|
```ruby
|
91
91
|
require 'kindle_manager'
|
92
|
-
client = KindleManager::Client.new(verbose: true, limit: 10)
|
92
|
+
client = KindleManager::Client.new(keep_cookie: true, verbose: true, limit: 10)
|
93
93
|
client.fetch_kindle_highlights
|
94
94
|
|
95
95
|
books = client.load_kindle_highlights
|
data/kindle_manager.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
22
|
spec.require_paths = ["lib"]
|
23
23
|
|
24
|
-
spec.add_runtime_dependency "amazon_auth", "~> 0.
|
24
|
+
spec.add_runtime_dependency "amazon_auth", "~> 0.4.0"
|
25
25
|
spec.add_development_dependency "bundler", "~> 1.14"
|
26
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
27
27
|
spec.add_development_dependency "rspec", "~> 3.0"
|
@@ -10,7 +10,7 @@ module KindleManager
|
|
10
10
|
extend(AmazonAuth::SessionExtension)
|
11
11
|
|
12
12
|
@store = KindleManager::FileStore.new(options.merge(session: @session))
|
13
|
-
log "Directory for downloaded pages is #{store.
|
13
|
+
log "Directory for downloaded pages is #{store.target_dir}"
|
14
14
|
end
|
15
15
|
|
16
16
|
def limit
|
@@ -22,6 +22,8 @@ module KindleManager
|
|
22
22
|
if session.first('.navHeader_myx')
|
23
23
|
log "Page found '#{session.first('.navHeader_myx').text}'"
|
24
24
|
break
|
25
|
+
else
|
26
|
+
submit_signin_form
|
25
27
|
end
|
26
28
|
end
|
27
29
|
end
|
@@ -29,10 +31,10 @@ module KindleManager
|
|
29
31
|
def load_next_kindle_list
|
30
32
|
wait_for_selector('.contentCount_myx')
|
31
33
|
current_loop = 0
|
34
|
+
last_page_scroll_offset = page_scroll_offset
|
32
35
|
while current_loop <= max_scroll_attempts
|
33
|
-
if limit && limit < number_of_fetched_books
|
34
|
-
|
35
|
-
elsif has_more_button?
|
36
|
+
break if limit && limit < number_of_fetched_books
|
37
|
+
if has_more_button?
|
36
38
|
snapshot_page
|
37
39
|
current_loop = 0
|
38
40
|
|
@@ -46,9 +48,16 @@ module KindleManager
|
|
46
48
|
session.execute_script "window.scrollBy(0,10000)"
|
47
49
|
end
|
48
50
|
sleep fetching_interval
|
51
|
+
if last_page_scroll_offset == page_scroll_offset
|
52
|
+
log "Stopping loading because 'page_scroll_offset' didn't change after a loop"
|
53
|
+
break
|
54
|
+
else
|
55
|
+
debug "last_page_scroll_offset:#{last_page_scroll_offset} new page_scroll_offset:#{page_scroll_offset}"
|
56
|
+
end
|
57
|
+
last_page_scroll_offset = page_scroll_offset
|
49
58
|
current_loop += 1
|
50
59
|
end
|
51
|
-
log "Stopped loading"
|
60
|
+
log "Stopped loading. You may want to resume with 'client.adapter.load_next_kindle_list'"
|
52
61
|
snapshot_page
|
53
62
|
end
|
54
63
|
|
@@ -58,7 +67,11 @@ module KindleManager
|
|
58
67
|
parser = KindleManager::BooksParser.new(file)
|
59
68
|
books += parser.parse
|
60
69
|
end
|
61
|
-
books.uniq(&:asin)
|
70
|
+
books.sort_by{|b| [-b.date.to_time.to_i, -b.fetched_at.to_i] }.uniq(&:asin)
|
71
|
+
end
|
72
|
+
|
73
|
+
def page_scroll_offset
|
74
|
+
session.evaluate_script('window.pageYOffset')
|
62
75
|
end
|
63
76
|
|
64
77
|
def has_more_button?
|
@@ -100,7 +100,7 @@ module KindleManager
|
|
100
100
|
parser = KindleManager::HighlightsParser.new(file)
|
101
101
|
books += parser.parse
|
102
102
|
end
|
103
|
-
books.reject(&:invalid?).uniq(&:asin)
|
103
|
+
books.reject(&:invalid?).sort_by{|b| [-b.last_annotated_on.to_time.to_i, -b.fetched_at.to_i] }.uniq(&:asin)
|
104
104
|
end
|
105
105
|
|
106
106
|
def snapshot_page(message = nil)
|
@@ -2,7 +2,7 @@ module KindleManager
|
|
2
2
|
class Client
|
3
3
|
include AmazonAuth::CommonExtension
|
4
4
|
|
5
|
-
attr_accessor :adapter
|
5
|
+
attr_accessor :adapter, :options
|
6
6
|
|
7
7
|
def initialize(options = {})
|
8
8
|
@options = options
|
@@ -14,10 +14,6 @@ module KindleManager
|
|
14
14
|
@_session ||= @client.session
|
15
15
|
end
|
16
16
|
|
17
|
-
def sign_in
|
18
|
-
@client.sign_in
|
19
|
-
end
|
20
|
-
|
21
17
|
def fetch_kindle_list
|
22
18
|
sign_in
|
23
19
|
set_adapter(:books, @options.merge(session: session))
|
@@ -25,8 +21,7 @@ module KindleManager
|
|
25
21
|
end
|
26
22
|
|
27
23
|
def fetch_kindle_highlights
|
28
|
-
|
29
|
-
@client.submit_signin_form
|
24
|
+
sign_in KindleManager::HighlightsAdapter::KINDLE_HIGHLIGHT_URL
|
30
25
|
set_adapter(:highlights, @options.merge(session: session))
|
31
26
|
adapter.fetch
|
32
27
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module KindleManager
|
2
2
|
class FileStore
|
3
|
-
|
3
|
+
TIME_FORMAT_FOR_FILENAME = '%Y%m%d%H%M%S'
|
4
|
+
|
5
|
+
attr_accessor :sub_dir, :dir_name, :session
|
4
6
|
|
5
7
|
def initialize(options = {})
|
6
8
|
@sub_dir = options.fetch(:sub_dir, 'books').to_s
|
@@ -11,28 +13,20 @@ module KindleManager
|
|
11
13
|
@session = options.fetch(:session, nil)
|
12
14
|
end
|
13
15
|
|
14
|
-
def
|
15
|
-
|
16
|
-
end
|
17
|
-
|
18
|
-
def root_dir
|
19
|
-
File.join(downloads_dir, @sub_dir)
|
20
|
-
end
|
21
|
-
|
22
|
-
def base_dir
|
23
|
-
File.join(root_dir, @dir_name)
|
16
|
+
def target_dir
|
17
|
+
File.join(sub_dir, dir_name)
|
24
18
|
end
|
25
19
|
|
26
20
|
def list_work_dirs
|
27
|
-
Dir[
|
21
|
+
Dir[File.join(Capybara.save_path, sub_dir,'*')].select{|f| File.directory? f }
|
28
22
|
end
|
29
23
|
|
30
24
|
def find_latest_dir_name
|
31
25
|
list_work_dirs.sort.last.to_s.split('/').last
|
32
26
|
end
|
33
27
|
|
34
|
-
def list_html_files
|
35
|
-
Dir[File.join(
|
28
|
+
def list_html_files
|
29
|
+
Dir[File.join(Capybara.save_path, target_dir,'*.html')].select{|f| File.file? f }
|
36
30
|
end
|
37
31
|
|
38
32
|
def html_path(time)
|
@@ -52,7 +46,7 @@ module KindleManager
|
|
52
46
|
private
|
53
47
|
|
54
48
|
def build_filepath(time, ext)
|
55
|
-
File.join(
|
49
|
+
File.join(target_dir, "#{time.strftime(TIME_FORMAT_FOR_FILENAME)}#{(time.usec / 1000.0).round.to_s.rjust(3,'0')}.#{ext}")
|
56
50
|
end
|
57
51
|
end
|
58
52
|
end
|
@@ -1,8 +1,15 @@
|
|
1
1
|
module KindleManager
|
2
2
|
class BaseParser
|
3
|
+
attr_accessor :fetched_at
|
3
4
|
|
4
5
|
def initialize(filepath, options = {})
|
5
6
|
@filepath = filepath
|
7
|
+
|
8
|
+
@fetched_at = if File.basename(@filepath) =~ /\A\d{14}/
|
9
|
+
Time.strptime(File.basename(@filepath)[0..14], KindleManager::FileStore::TIME_FORMAT_FOR_FILENAME)
|
10
|
+
else
|
11
|
+
File.ctime(@filepath)
|
12
|
+
end
|
6
13
|
end
|
7
14
|
|
8
15
|
def doc
|
@@ -1,10 +1,12 @@
|
|
1
1
|
module KindleManager
|
2
2
|
class BooksParser < BaseParser
|
3
3
|
class BookRow
|
4
|
+
|
4
5
|
include KindleManager::Parsers::Common
|
5
6
|
|
6
|
-
def initialize(node)
|
7
|
+
def initialize(node, options = {})
|
7
8
|
@node = node
|
9
|
+
@fetched_at = options[:fetched_at]
|
8
10
|
end
|
9
11
|
|
10
12
|
def inspect
|
@@ -45,7 +47,9 @@ module KindleManager
|
|
45
47
|
end
|
46
48
|
|
47
49
|
def parse
|
48
|
-
@_parsed ||=
|
50
|
+
@_parsed ||= begin
|
51
|
+
doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e, fetched_at: fetched_at) }
|
52
|
+
end
|
49
53
|
end
|
50
54
|
end
|
51
55
|
end
|
@@ -3,8 +3,9 @@ module KindleManager
|
|
3
3
|
class BookWithNote
|
4
4
|
include KindleManager::Parsers::Common
|
5
5
|
|
6
|
-
def initialize(node)
|
6
|
+
def initialize(node, options = {})
|
7
7
|
@node = node
|
8
|
+
@fetched_at = options[:fetched_at]
|
8
9
|
end
|
9
10
|
|
10
11
|
def inspect
|
@@ -81,7 +82,7 @@ module KindleManager
|
|
81
82
|
|
82
83
|
def parse
|
83
84
|
@_parsed ||= begin
|
84
|
-
result = doc.css('.kp-notebook-annotation-container').map{|e| BookWithNote.new(e) }
|
85
|
+
result = doc.css('.kp-notebook-annotation-container').map{|e| BookWithNote.new(e, fetched_at: fetched_at) }
|
85
86
|
puts "[DEBUG] This page(#{@filepath}) has many books. asin -> #{result.map(&:asin).join(',')}" if result.size >= 2
|
86
87
|
puts "[DEBUG] Incomplete page(#{@filepath}). asin:#{result.first.asin} #{result.first.title} (#{result.first.count_summary['text'].inspect})" if result.any?(&:invalid?)
|
87
88
|
result
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kindle_manager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kazuho Yamaguchi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-07-
|
11
|
+
date: 2017-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amazon_auth
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.4.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 0.4.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|