kindle_manager 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +66 -6
- data/lib/kindle_manager/adapters/base_adapter.rb +24 -0
- data/lib/kindle_manager/adapters/books_adapter.rb +97 -0
- data/lib/kindle_manager/adapters/highlights_adapter.rb +104 -0
- data/lib/kindle_manager/client.rb +18 -97
- data/lib/kindle_manager/file_store.rb +20 -23
- data/lib/kindle_manager/parsers/base_parser.rb +16 -0
- data/lib/kindle_manager/{list_parser.rb → parsers/books_parser.rb} +6 -24
- data/lib/kindle_manager/parsers/common.rb +16 -0
- data/lib/kindle_manager/parsers/highlights_parser.rb +91 -0
- data/lib/kindle_manager/version.rb +1 -1
- data/lib/kindle_manager.rb +7 -1
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 289fa1e86d32bf07025ccba4417a62e78d7a68fe
|
4
|
+
data.tar.gz: 1de00446cd5f21fb37b9dfdf236a60ccc6c51065
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b22358dc6f5a643fe72fb0ea934654db3c8984d498640c342b7bdbcd6c52a3929f4bb3bf3cee557c308602e09c42c60cfecc7d366d64851f35e97508e96683ba
|
7
|
+
data.tar.gz: efd71547be719c70dfdb883a42bacbbebf702a5791ed977c17ae2b7a61f630b70f974920ac2e1402a23a7b447cd0530ee088f25ac6299c9504ef6be6d0feee42
|
data/README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/kindle_manager.svg)](https://badge.fury.io/rb/kindle_manager)
|
4
4
|
[![Build Status](https://travis-ci.org/kyamaguchi/kindle_manager.svg?branch=master)](https://travis-ci.org/kyamaguchi/kindle_manager)
|
5
5
|
|
6
|
-
Scrape information of kindle books from amazon site
|
6
|
+
Scrape information of kindle books & highlights from amazon site
|
7
7
|
|
8
8
|
##### Fetch Kindle Books information
|
9
9
|
|
@@ -48,11 +48,13 @@ And `Dotenv.load` or `gem 'dotenv-rails'` may be required when you use this in y
|
|
48
48
|
|
49
49
|
### Run
|
50
50
|
|
51
|
+
#### Kindle books list
|
52
|
+
|
51
53
|
In console
|
52
54
|
|
53
|
-
```
|
55
|
+
```ruby
|
54
56
|
require 'kindle_manager'
|
55
|
-
client = KindleManager::Client.new(
|
57
|
+
client = KindleManager::Client.new(verbose: true, limit: 1000)
|
56
58
|
client.fetch_kindle_list
|
57
59
|
|
58
60
|
books = client.load_kindle_books
|
@@ -63,14 +65,66 @@ client.quit
|
|
63
65
|
Once `fetch_kindle_list` succeeds, you can load books information of downloaded pages anytime.
|
64
66
|
(You don't need to fetch pages with launching browser every time.)
|
65
67
|
|
66
|
-
```
|
68
|
+
```ruby
|
67
69
|
client = KindleManager::Client.new
|
68
70
|
books = client.load_kindle_books
|
69
71
|
```
|
70
72
|
|
71
|
-
|
73
|
+
Example of data
|
72
74
|
|
73
|
-
|
75
|
+
```ruby
|
76
|
+
console> pp books.first.to_hash
|
77
|
+
{"asin"=>"B0026OR2TU",
|
78
|
+
"title"=>
|
79
|
+
"Rails Cookbook: Recipes for Rapid Web Development with Ruby (Cookbooks (O'Reilly))",
|
80
|
+
"tag"=>"Sample",
|
81
|
+
"author"=>"Rob Orsini",
|
82
|
+
"date"=>Fri, 17 Mar 2017,
|
83
|
+
"collection_count"=>0}
|
84
|
+
```
|
85
|
+
|
86
|
+
#### Kindle highlights and notes
|
87
|
+
|
88
|
+
In console
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
require 'kindle_manager'
|
92
|
+
client = KindleManager::Client.new(verbose: true, limit: 10)
|
93
|
+
client.fetch_kindle_highlights
|
94
|
+
|
95
|
+
books = client.load_kindle_highlights
|
96
|
+
```
|
97
|
+
|
98
|
+
Example of data
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
console> pp books.first.to_hash
|
102
|
+
{"asin"=>"B004YW6M6G",
|
103
|
+
"title"=>
|
104
|
+
"Design Patterns in Ruby (Adobe Reader) (Addison-Wesley Professional Ruby Series)",
|
105
|
+
"author"=>"Russ Olsen",
|
106
|
+
"last_annotated_on"=>Wed, 21 Jun 2017,
|
107
|
+
"highlights_count"=>8,
|
108
|
+
"notes_count"=>7,
|
109
|
+
"highlights_and_notes"=>
|
110
|
+
[{"location"=>350,
|
111
|
+
"highlight"=>
|
112
|
+
"Design Patterns: Elements of Reusable Object-Oriented Software,",
|
113
|
+
"color"=>"orange",
|
114
|
+
"note"=>""},
|
115
|
+
{"location"=>351,
|
116
|
+
"highlight"=>"\"Gang of Four book\" (GoF)",
|
117
|
+
"color"=>"yellow",
|
118
|
+
"note"=>""},
|
119
|
+
{"location"=>356, "highlight"=>nil, "color"=>nil, "note"=>"note foo"},
|
120
|
+
...
|
121
|
+
{"location"=>385,
|
122
|
+
"highlight"=>nil,
|
123
|
+
"color"=>nil,
|
124
|
+
"note"=>"object oriented"}]}
|
125
|
+
```
|
126
|
+
|
127
|
+
#### Options
|
74
128
|
|
75
129
|
Limit fetching with number of fetched books: `client = KindleManager::Client.new(limit: 100)`
|
76
130
|
|
@@ -86,6 +140,12 @@ Firefox: `driver: :firefox`
|
|
86
140
|
|
87
141
|
Login and password: `login: 'xxx', password: 'yyy'`
|
88
142
|
|
143
|
+
Output debug log: `debug: true`
|
144
|
+
|
145
|
+
## TODO
|
146
|
+
|
147
|
+
- Limit the number of fetching books by date
|
148
|
+
|
89
149
|
## Applications
|
90
150
|
|
91
151
|
Applications using this gem
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module KindleManager
|
2
|
+
class BaseAdapter
|
3
|
+
include AmazonAuth::CommonExtension
|
4
|
+
|
5
|
+
attr_accessor :store, :session, :options
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
@options = options
|
9
|
+
@session = options.fetch(:session, nil)
|
10
|
+
extend(AmazonAuth::SessionExtension)
|
11
|
+
|
12
|
+
@store = KindleManager::FileStore.new(options.merge(session: @session))
|
13
|
+
log "Directory for downloaded pages is #{store.base_dir}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def limit
|
17
|
+
options.fetch(:limit, nil)
|
18
|
+
end
|
19
|
+
|
20
|
+
def max_scroll_attempts
|
21
|
+
options.fetch(:max_scroll_attempts, 20)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module KindleManager
|
2
|
+
class BooksAdapter < BaseAdapter
|
3
|
+
def fetch
|
4
|
+
go_to_kindle_management_page
|
5
|
+
begin
|
6
|
+
load_next_kindle_list
|
7
|
+
rescue => e
|
8
|
+
puts "[ERROR] #{e}"
|
9
|
+
puts e.backtrace
|
10
|
+
puts
|
11
|
+
puts "Retry manually -> client.adapter.load_next_kindle_list or client.session etc."
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def go_to_kindle_management_page
|
16
|
+
log "Visiting kindle management page"
|
17
|
+
wait_for_selector('#shopAllLinks', wait_time: 5)
|
18
|
+
3.times do
|
19
|
+
link = links_for('#navFooter a').find{|link| link =~ %r{/gp/digital/fiona/manage/} }
|
20
|
+
session.visit link
|
21
|
+
wait_for_selector('.navHeader_myx')
|
22
|
+
if session.first('.navHeader_myx')
|
23
|
+
log "Page found '#{session.first('.navHeader_myx').text}'"
|
24
|
+
break
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def load_next_kindle_list
|
30
|
+
wait_for_selector('.contentCount_myx')
|
31
|
+
current_loop = 0
|
32
|
+
while current_loop <= max_scroll_attempts
|
33
|
+
if limit && limit < number_of_fetched_books
|
34
|
+
break
|
35
|
+
elsif has_more_button?
|
36
|
+
snapshot_page
|
37
|
+
current_loop = 0
|
38
|
+
|
39
|
+
log "Clicking 'Show More'"
|
40
|
+
session.execute_script "window.scrollBy(0,-800)"
|
41
|
+
show_more_button.click
|
42
|
+
sleep 1
|
43
|
+
raise('Clicking of more button may have failed') if has_more_button?
|
44
|
+
else
|
45
|
+
log "Loading books with scrolling #{current_loop+1}"
|
46
|
+
session.execute_script "window.scrollBy(0,10000)"
|
47
|
+
end
|
48
|
+
sleep fetching_interval
|
49
|
+
current_loop += 1
|
50
|
+
end
|
51
|
+
log "Stopped loading"
|
52
|
+
snapshot_page
|
53
|
+
end
|
54
|
+
|
55
|
+
def load
|
56
|
+
books = []
|
57
|
+
store.list_html_files.each do |file|
|
58
|
+
parser = KindleManager::BooksParser.new(file)
|
59
|
+
books += parser.parse
|
60
|
+
end
|
61
|
+
books.uniq(&:asin)
|
62
|
+
end
|
63
|
+
|
64
|
+
def has_more_button?
|
65
|
+
!!show_more_button
|
66
|
+
end
|
67
|
+
|
68
|
+
def show_more_button
|
69
|
+
session.all('#contentTable_showMore_myx').find{|e| e['outerHTML'].match(/showmore_button/) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def number_of_fetched_books
|
73
|
+
re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)〜(\d+)/ : /(\d+) - (\d+)/)
|
74
|
+
wait_for_selector('.contentCount_myx')
|
75
|
+
text = doc.css('.contentCount_myx').text
|
76
|
+
m = text.match(re)
|
77
|
+
return m[2].to_i if m.present?
|
78
|
+
raise("Couldn't get the number of fetched books [#{text}]")
|
79
|
+
end
|
80
|
+
|
81
|
+
def loading?
|
82
|
+
session.first('.myx-popover-loading-wrapper').present?
|
83
|
+
end
|
84
|
+
|
85
|
+
def snapshot_page
|
86
|
+
if (text = doc.css('.contentCount_myx').try!(:text)).present?
|
87
|
+
log "Current page [#{text.to_s.gsub(/[[:space:]]+/, ' ').strip}]"
|
88
|
+
end
|
89
|
+
store.record_page
|
90
|
+
log "Saving page"
|
91
|
+
end
|
92
|
+
|
93
|
+
def fetching_interval
|
94
|
+
@options.fetch(:fetching_interval, 3)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module KindleManager
|
2
|
+
class HighlightsAdapter < BaseAdapter
|
3
|
+
KINDLE_HIGHLIGHT_URL = "https://read.#{AmazonInfo.domain}/kp/notebook"
|
4
|
+
|
5
|
+
attr_accessor :library_ids, :loaded_library_ids, :failed_library_ids
|
6
|
+
|
7
|
+
def fetch
|
8
|
+
go_to_kindle_highlights_page
|
9
|
+
fetch_library_ids
|
10
|
+
fetch_kindle_highlights
|
11
|
+
end
|
12
|
+
|
13
|
+
def go_to_kindle_highlights_page
|
14
|
+
log "Visiting kindle highlights page"
|
15
|
+
session.visit KINDLE_HIGHLIGHT_URL
|
16
|
+
wait_for_selector('#library')
|
17
|
+
check_library_scroll
|
18
|
+
snapshot_page
|
19
|
+
end
|
20
|
+
|
21
|
+
def fetch_library_ids
|
22
|
+
last_scroll_top = check_library_scroll
|
23
|
+
20.times do
|
24
|
+
scroll_library_pane(last_scroll_top + 20000)
|
25
|
+
sleep(2)
|
26
|
+
new_scroll_top = check_library_scroll
|
27
|
+
break if limit && limit < doc.css('#library #kp-notebook-library > .a-row').size
|
28
|
+
break if last_scroll_top == new_scroll_top
|
29
|
+
last_scroll_top = new_scroll_top
|
30
|
+
end
|
31
|
+
snapshot_page
|
32
|
+
self.library_ids = doc.css('#library #kp-notebook-library > .a-row').map{|e| e['id'] }
|
33
|
+
self.loaded_library_ids ||= []
|
34
|
+
self.failed_library_ids ||= []
|
35
|
+
log "Number of library ids is #{library_ids.size}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def check_library_scroll
|
39
|
+
scroll_top = session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).scrollTop")
|
40
|
+
scroll_height = session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).scrollHeight")
|
41
|
+
offset_height = session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).offsetHeight")
|
42
|
+
log "Scroll top:#{scroll_top} height:#{scroll_height} offset_height:#{offset_height}"
|
43
|
+
scroll_top
|
44
|
+
end
|
45
|
+
|
46
|
+
def scroll_library_pane(target_scroll_top)
|
47
|
+
session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).scrollTop = #{target_scroll_top}")
|
48
|
+
end
|
49
|
+
|
50
|
+
def fetch_kindle_highlights
|
51
|
+
library_ids.each_with_index do |library_id,i|
|
52
|
+
break if limit && limit < i+1
|
53
|
+
next if loaded_library_ids.include?(library_id)
|
54
|
+
fetch_book_with_highlights(library_id)
|
55
|
+
end
|
56
|
+
report_failed_ids
|
57
|
+
snapshot_page
|
58
|
+
end
|
59
|
+
|
60
|
+
def fetch_book_with_highlights(library_id)
|
61
|
+
log "Fetching highlights for the book #{library_id}"
|
62
|
+
session.first("##{library_id}").click
|
63
|
+
wait_for_selector('#annotations .kp-notebook-annotation-container', wait_time: 10)
|
64
|
+
title = doc.css('#annotations .kp-notebook-annotation-container h3.kp-notebook-metadata').try!(:text)
|
65
|
+
highlights_count, notes_count = fetch_highlights_and_notes
|
66
|
+
snapshot_page("Saving page for [#{title}] (#{library_id}) highlights:#{highlights_count} notes:#{notes_count}")
|
67
|
+
if title.present?
|
68
|
+
self.loaded_library_ids << library_id
|
69
|
+
else
|
70
|
+
self.failed_library_ids << library_id
|
71
|
+
log "[ERROR] Failed to load #{library_id} or this book doesn't have any highlights and notes"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def fetch_highlights_and_notes
|
76
|
+
highlights_count = notes_count = nil
|
77
|
+
10.times do
|
78
|
+
sleep(1)
|
79
|
+
highlights_count = doc.css('#annotations .kp-notebook-annotation-container #kp-notebook-highlights-count').try!(:text)
|
80
|
+
notes_count = doc.css('#annotations .kp-notebook-annotation-container #kp-notebook-notes-count').try!(:text)
|
81
|
+
break if highlights_count != '--' && notes_count != '--'
|
82
|
+
end
|
83
|
+
[highlights_count, notes_count]
|
84
|
+
end
|
85
|
+
|
86
|
+
def report_failed_ids
|
87
|
+
log("May have failed with #{failed_library_ids.inspect}. Retry with client.adapter.session.first('#B000000000').click") if failed_library_ids.size > 0
|
88
|
+
end
|
89
|
+
|
90
|
+
def load
|
91
|
+
books = []
|
92
|
+
store.list_html_files.each do |file|
|
93
|
+
parser = KindleManager::HighlightsParser.new(file)
|
94
|
+
books += parser.parse
|
95
|
+
end
|
96
|
+
books.reject(&:invalid?).uniq(&:asin)
|
97
|
+
end
|
98
|
+
|
99
|
+
def snapshot_page(message = nil)
|
100
|
+
store.record_page
|
101
|
+
log(message.presence || "Saving page")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -2,129 +2,50 @@ module KindleManager
|
|
2
2
|
class Client
|
3
3
|
include AmazonAuth::CommonExtension
|
4
4
|
|
5
|
-
attr_accessor :
|
5
|
+
attr_accessor :adapter
|
6
6
|
|
7
7
|
def initialize(options = {})
|
8
|
-
@limit = options.fetch(:limit, nil)
|
9
|
-
@max_scroll_attempts = options.fetch(:max_scroll_attempts, 20)
|
10
8
|
@options = options
|
11
9
|
@client = AmazonAuth::Client.new(@options)
|
12
10
|
extend(AmazonAuth::SessionExtension)
|
13
11
|
end
|
14
12
|
|
15
13
|
def session
|
16
|
-
@
|
14
|
+
@_session ||= @client.session
|
17
15
|
end
|
18
16
|
|
19
|
-
def
|
20
|
-
@
|
21
|
-
end
|
22
|
-
|
23
|
-
def setup_file_store
|
24
|
-
store.session = session
|
25
|
-
log "Directory for downloaded pages is #{store.base_dir}"
|
17
|
+
def sign_in
|
18
|
+
@client.sign_in
|
26
19
|
end
|
27
20
|
|
28
21
|
def fetch_kindle_list
|
29
22
|
sign_in
|
30
|
-
|
31
|
-
|
32
|
-
begin
|
33
|
-
load_next_kindle_list
|
34
|
-
rescue => e
|
35
|
-
puts "[ERROR] #{e}"
|
36
|
-
puts e.backtrace
|
37
|
-
puts
|
38
|
-
puts "Retry manually -> load_next_kindle_list or session etc."
|
39
|
-
end
|
23
|
+
set_adapter(:books, @options.merge(session: session))
|
24
|
+
adapter.fetch
|
40
25
|
end
|
41
26
|
|
42
|
-
def
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
books += parser.book_list
|
47
|
-
end
|
48
|
-
books.uniq(&:asin)
|
49
|
-
end
|
50
|
-
|
51
|
-
def sign_in
|
52
|
-
@client.sign_in
|
27
|
+
def fetch_kindle_highlights
|
28
|
+
sign_in
|
29
|
+
set_adapter(:highlights, @options.merge(session: session))
|
30
|
+
adapter.fetch
|
53
31
|
end
|
54
32
|
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
3.times do
|
59
|
-
link = links_for('#navFooter a').find{|link| link =~ %r{/gp/digital/fiona/manage/} }
|
60
|
-
session.visit link
|
61
|
-
wait_for_selector('.navHeader_myx')
|
62
|
-
if session.first('.navHeader_myx')
|
63
|
-
log "Page found '#{session.first('.navHeader_myx').text}'"
|
64
|
-
break
|
65
|
-
end
|
66
|
-
end
|
33
|
+
def load_kindle_books
|
34
|
+
set_adapter(:books, @options.except(:create))
|
35
|
+
adapter.load
|
67
36
|
end
|
68
37
|
|
69
|
-
def
|
70
|
-
|
71
|
-
|
72
|
-
while @current_loop <= @max_scroll_attempts
|
73
|
-
if @limit && @limit < number_of_fetched_books
|
74
|
-
break
|
75
|
-
elsif has_more_button?
|
76
|
-
snapshot_page
|
77
|
-
@current_loop = 0
|
78
|
-
|
79
|
-
log "Clicking 'Show More'"
|
80
|
-
session.execute_script "window.scrollBy(0,-800)"
|
81
|
-
show_more_button.click
|
82
|
-
sleep 1
|
83
|
-
raise('Clicking of more button may have failed') if has_more_button?
|
84
|
-
else
|
85
|
-
log "Loading books with scrolling #{@current_loop+1}"
|
86
|
-
session.execute_script "window.scrollBy(0,10000)"
|
87
|
-
end
|
88
|
-
sleep fetching_interval
|
89
|
-
@current_loop += 1
|
90
|
-
end
|
91
|
-
log "Stopped loading"
|
92
|
-
snapshot_page
|
38
|
+
def load_kindle_highlights
|
39
|
+
set_adapter(:highlights, @options.except(:create))
|
40
|
+
adapter.load
|
93
41
|
end
|
94
42
|
|
95
43
|
def quit
|
96
44
|
session.driver.quit
|
97
45
|
end
|
98
46
|
|
99
|
-
def
|
100
|
-
|
101
|
-
end
|
102
|
-
|
103
|
-
def show_more_button
|
104
|
-
session.all('#contentTable_showMore_myx').find{|e| e['outerHTML'].match(/showmore_button/) }
|
105
|
-
end
|
106
|
-
|
107
|
-
def number_of_fetched_books
|
108
|
-
re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)〜(\d+)/ : /(\d+) - (\d+)/)
|
109
|
-
wait_for_selector('.contentCount_myx')
|
110
|
-
text = session.first('.contentCount_myx').text
|
111
|
-
m = text.match(re)
|
112
|
-
return m[2].to_i if m.present?
|
113
|
-
raise("Couldn't get the number of fetched books [#{text}]")
|
114
|
-
end
|
115
|
-
|
116
|
-
def loading?
|
117
|
-
session.first('.myx-popover-loading-wrapper').present?
|
118
|
-
end
|
119
|
-
|
120
|
-
def snapshot_page
|
121
|
-
log "Current page [#{session.first('.contentCount_myx').text}]" if session.first('.contentCount_myx')
|
122
|
-
store.record_page
|
123
|
-
log "Saving page"
|
124
|
-
end
|
125
|
-
|
126
|
-
def fetching_interval
|
127
|
-
@options.fetch(:fetching_interval, 3)
|
47
|
+
def set_adapter(type, options)
|
48
|
+
@adapter = "KindleManager::#{type.to_s.camelize}Adapter".constantize.new(options.merge(sub_dir: type))
|
128
49
|
end
|
129
50
|
end
|
130
51
|
end
|
@@ -3,6 +3,7 @@ module KindleManager
|
|
3
3
|
attr_accessor :dir_name, :session
|
4
4
|
|
5
5
|
def initialize(options = {})
|
6
|
+
@sub_dir = options.fetch(:sub_dir, 'books').to_s
|
6
7
|
@dir_name = options.fetch(:dir_name) do
|
7
8
|
tmp_dir_name = options[:create] ? nil : find_latest_dir_name
|
8
9
|
tmp_dir_name.presence || Time.current.strftime("%Y%m%d%H%M%S")
|
@@ -10,12 +11,28 @@ module KindleManager
|
|
10
11
|
@session = options.fetch(:session, nil)
|
11
12
|
end
|
12
13
|
|
14
|
+
def downloads_dir
|
15
|
+
'downloads'
|
16
|
+
end
|
17
|
+
|
18
|
+
def root_dir
|
19
|
+
File.join(downloads_dir, @sub_dir)
|
20
|
+
end
|
21
|
+
|
13
22
|
def base_dir
|
14
|
-
File.join(
|
23
|
+
File.join(root_dir, @dir_name)
|
15
24
|
end
|
16
25
|
|
17
|
-
def
|
18
|
-
|
26
|
+
def list_work_dirs
|
27
|
+
Dir["#{root_dir}/*"].select{|f| File.directory? f }
|
28
|
+
end
|
29
|
+
|
30
|
+
def find_latest_dir_name
|
31
|
+
list_work_dirs.sort.last.to_s.split('/').last
|
32
|
+
end
|
33
|
+
|
34
|
+
def list_html_files(dir = nil)
|
35
|
+
Dir[File.join(base_dir,'*.html')].select{|f| File.file? f }
|
19
36
|
end
|
20
37
|
|
21
38
|
def html_path(time)
|
@@ -32,26 +49,6 @@ module KindleManager
|
|
32
49
|
@session.save_screenshot(image_path(time))
|
33
50
|
end
|
34
51
|
|
35
|
-
def self.list_download_dirs
|
36
|
-
Dir["#{downloads_dir}/*"].select{|f| File.directory? f }
|
37
|
-
end
|
38
|
-
|
39
|
-
def self.list_html_files(dir = nil)
|
40
|
-
if dir
|
41
|
-
Dir[File.join(downloads_dir, dir,'*.html')].select{|f| File.file? f }
|
42
|
-
else
|
43
|
-
Dir["#{downloads_dir}/*/*.html"].select{|f| File.file? f }
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def list_html_files
|
48
|
-
self.class.list_html_files(@dir_name)
|
49
|
-
end
|
50
|
-
|
51
|
-
def find_latest_dir_name
|
52
|
-
self.class.list_download_dirs.sort.last.to_s.split('/').last
|
53
|
-
end
|
54
|
-
|
55
52
|
private
|
56
53
|
|
57
54
|
def build_filepath(time, ext)
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module KindleManager
|
2
|
-
class
|
2
|
+
class BooksParser < BaseParser
|
3
3
|
class BookRow
|
4
|
+
include KindleManager::Parsers::Common
|
5
|
+
|
4
6
|
def initialize(node)
|
5
7
|
@node = node
|
6
8
|
end
|
@@ -26,15 +28,7 @@ module KindleManager
|
|
26
28
|
end
|
27
29
|
|
28
30
|
def date
|
29
|
-
@_date ||=
|
30
|
-
date_text = @node.css("div[id^='date']").text
|
31
|
-
begin
|
32
|
-
Date.parse(date_text)
|
33
|
-
rescue ArgumentError => e
|
34
|
-
m = date_text.match(/\A(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日\z/)
|
35
|
-
Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
|
36
|
-
end
|
37
|
-
end
|
31
|
+
@_date ||= parse_date(@node.css("div[id^='date']").text)
|
38
32
|
end
|
39
33
|
|
40
34
|
def collection_count
|
@@ -50,20 +44,8 @@ module KindleManager
|
|
50
44
|
end
|
51
45
|
end
|
52
46
|
|
53
|
-
def
|
54
|
-
@
|
55
|
-
end
|
56
|
-
|
57
|
-
def book_list
|
58
|
-
@book_list ||= doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e) }
|
59
|
-
end
|
60
|
-
|
61
|
-
def doc
|
62
|
-
@doc ||= Nokogiri::HTML(body)
|
63
|
-
end
|
64
|
-
|
65
|
-
def body
|
66
|
-
@body ||= File.read(@filepath)
|
47
|
+
def parse
|
48
|
+
@_parsed ||= doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e) }
|
67
49
|
end
|
68
50
|
end
|
69
51
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module KindleManager
|
2
|
+
module Parsers
|
3
|
+
module Common
|
4
|
+
|
5
|
+
def parse_date(date_text)
|
6
|
+
begin
|
7
|
+
Date.parse(date_text)
|
8
|
+
rescue ArgumentError => e
|
9
|
+
m = date_text.match(/\A(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日\z/)
|
10
|
+
m = date_text.match(/(?<month>\d{1,2})月\D+(?<day>\d{1,2}),\D+(?<year>\d{4})/) if m.nil?
|
11
|
+
Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module KindleManager
|
2
|
+
class HighlightsParser < BaseParser
|
3
|
+
class BookWithNote
|
4
|
+
include KindleManager::Parsers::Common
|
5
|
+
|
6
|
+
def initialize(node)
|
7
|
+
@node = node
|
8
|
+
end
|
9
|
+
|
10
|
+
def inspect
|
11
|
+
"#<#{self.class.name}:#{self.object_id} #{self.to_hash}>"
|
12
|
+
end
|
13
|
+
|
14
|
+
def asin
|
15
|
+
@_asin ||= @node.css('#kp-notebook-annotations-asin').first['value']
|
16
|
+
end
|
17
|
+
|
18
|
+
def title
|
19
|
+
@_title ||= @node.css('h3.kp-notebook-metadata').text
|
20
|
+
end
|
21
|
+
|
22
|
+
def author
|
23
|
+
@_author ||= @node.css('h1.kp-notebook-metadata').first.text
|
24
|
+
end
|
25
|
+
|
26
|
+
def last_annotated_on
|
27
|
+
@_last_annotated_on ||= parse_date(@node.css('#kp-notebook-annotated-date').text)
|
28
|
+
end
|
29
|
+
|
30
|
+
def highlights_count
|
31
|
+
@_highlights_count ||= @node.css('.kp-notebook-highlight').size
|
32
|
+
end
|
33
|
+
|
34
|
+
def notes_count
|
35
|
+
@_notes_count ||= @node.css('.kp-notebook-note').reject{|e| e['class'] =~ /aok-hidden/ }.size
|
36
|
+
end
|
37
|
+
|
38
|
+
def highlights_and_notes
|
39
|
+
@_highlights_and_notes ||= begin
|
40
|
+
# Excluding the first element which has book info
|
41
|
+
@node.css('.a-spacing-base')[1..-1].map do |node|
|
42
|
+
location = node.css('#kp-annotation-location').first['value'].to_i
|
43
|
+
highlight_node = node.css('.kp-notebook-highlight').first
|
44
|
+
highlight = highlight_node && highlight_node.css('#highlight').first.text
|
45
|
+
color = highlight_node && highlight_node['class'].split.find{|v| v =~ /kp-notebook-highlight-/ }.split('-').last
|
46
|
+
note = node.css('#note').first.text
|
47
|
+
{'location' => location, 'highlight' => highlight, 'color' => color, 'note' => note}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def highlights
|
53
|
+
highlights_and_notes.reject{|e| e['highlight'].blank? }
|
54
|
+
end
|
55
|
+
|
56
|
+
def notes
|
57
|
+
highlights_and_notes.reject{|e| e['note'].blank? }
|
58
|
+
end
|
59
|
+
|
60
|
+
# This can be used to verify the count of hightlights and notes
|
61
|
+
def count_summary
|
62
|
+
@_count_summary ||= begin
|
63
|
+
text = @node.css('h1.kp-notebook-metadata').last.text.strip
|
64
|
+
a, b = text.split('|').map{|text| m = text.match(/\d+/); m.nil? ? nil : m[0].to_i }
|
65
|
+
{'text' => text, 'highlights_count' => a, 'notes_count' => b}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def to_hash
|
70
|
+
hash = {}
|
71
|
+
%w[asin title author last_annotated_on highlights_count notes_count highlights_and_notes].each do |f|
|
72
|
+
hash[f] = send(f)
|
73
|
+
end
|
74
|
+
hash
|
75
|
+
end
|
76
|
+
|
77
|
+
def invalid?
|
78
|
+
!!(asin.blank? || count_summary['text'] =~ /--/)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def parse
|
83
|
+
@_parsed ||= begin
|
84
|
+
result = doc.css('.kp-notebook-annotation-container').map{|e| BookWithNote.new(e) }
|
85
|
+
puts "[DEBUG] This page(#{@filepath}) has many books. asin -> #{result.map(&:asin).join(',')}" if result.size >= 2
|
86
|
+
puts "[DEBUG] Incomplete page(#{@filepath}). asin:#{result.first.asin} #{result.first.title} (#{result.first.count_summary['text'].inspect})" if result.any?(&:invalid?)
|
87
|
+
result
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/lib/kindle_manager.rb
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
require 'amazon_auth'
|
2
2
|
require "kindle_manager/version"
|
3
|
+
require "kindle_manager/adapters/base_adapter"
|
4
|
+
require "kindle_manager/adapters/books_adapter"
|
5
|
+
require "kindle_manager/adapters/highlights_adapter"
|
3
6
|
require "kindle_manager/client"
|
4
7
|
require "kindle_manager/file_store"
|
5
|
-
require "kindle_manager/
|
8
|
+
require "kindle_manager/parsers/common"
|
9
|
+
require "kindle_manager/parsers/base_parser"
|
10
|
+
require "kindle_manager/parsers/books_parser"
|
11
|
+
require "kindle_manager/parsers/highlights_parser"
|
6
12
|
|
7
13
|
module KindleManager
|
8
14
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kindle_manager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kazuho Yamaguchi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amazon_auth
|
@@ -98,9 +98,15 @@ files:
|
|
98
98
|
- bin/setup
|
99
99
|
- kindle_manager.gemspec
|
100
100
|
- lib/kindle_manager.rb
|
101
|
+
- lib/kindle_manager/adapters/base_adapter.rb
|
102
|
+
- lib/kindle_manager/adapters/books_adapter.rb
|
103
|
+
- lib/kindle_manager/adapters/highlights_adapter.rb
|
101
104
|
- lib/kindle_manager/client.rb
|
102
105
|
- lib/kindle_manager/file_store.rb
|
103
|
-
- lib/kindle_manager/
|
106
|
+
- lib/kindle_manager/parsers/base_parser.rb
|
107
|
+
- lib/kindle_manager/parsers/books_parser.rb
|
108
|
+
- lib/kindle_manager/parsers/common.rb
|
109
|
+
- lib/kindle_manager/parsers/highlights_parser.rb
|
104
110
|
- lib/kindle_manager/version.rb
|
105
111
|
homepage: https://github.com/kyamaguchi/kindle_manager
|
106
112
|
licenses:
|