kindle_manager 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +66 -6
- data/lib/kindle_manager/adapters/base_adapter.rb +24 -0
- data/lib/kindle_manager/adapters/books_adapter.rb +97 -0
- data/lib/kindle_manager/adapters/highlights_adapter.rb +104 -0
- data/lib/kindle_manager/client.rb +18 -97
- data/lib/kindle_manager/file_store.rb +20 -23
- data/lib/kindle_manager/parsers/base_parser.rb +16 -0
- data/lib/kindle_manager/{list_parser.rb → parsers/books_parser.rb} +6 -24
- data/lib/kindle_manager/parsers/common.rb +16 -0
- data/lib/kindle_manager/parsers/highlights_parser.rb +91 -0
- data/lib/kindle_manager/version.rb +1 -1
- data/lib/kindle_manager.rb +7 -1
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 289fa1e86d32bf07025ccba4417a62e78d7a68fe
|
4
|
+
data.tar.gz: 1de00446cd5f21fb37b9dfdf236a60ccc6c51065
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b22358dc6f5a643fe72fb0ea934654db3c8984d498640c342b7bdbcd6c52a3929f4bb3bf3cee557c308602e09c42c60cfecc7d366d64851f35e97508e96683ba
|
7
|
+
data.tar.gz: efd71547be719c70dfdb883a42bacbbebf702a5791ed977c17ae2b7a61f630b70f974920ac2e1402a23a7b447cd0530ee088f25ac6299c9504ef6be6d0feee42
|
data/README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
[](https://badge.fury.io/rb/kindle_manager)
|
4
4
|
[](https://travis-ci.org/kyamaguchi/kindle_manager)
|
5
5
|
|
6
|
-
Scrape information of kindle books from amazon site
|
6
|
+
Scrape information of kindle books & highlights from amazon site
|
7
7
|
|
8
8
|
##### Fetch Kindle Books information
|
9
9
|
|
@@ -48,11 +48,13 @@ And `Dotenv.load` or `gem 'dotenv-rails'` may be required when you use this in y
|
|
48
48
|
|
49
49
|
### Run
|
50
50
|
|
51
|
+
#### Kindle books list
|
52
|
+
|
51
53
|
In console
|
52
54
|
|
53
|
-
```
|
55
|
+
```ruby
|
54
56
|
require 'kindle_manager'
|
55
|
-
client = KindleManager::Client.new(
|
57
|
+
client = KindleManager::Client.new(verbose: true, limit: 1000)
|
56
58
|
client.fetch_kindle_list
|
57
59
|
|
58
60
|
books = client.load_kindle_books
|
@@ -63,14 +65,66 @@ client.quit
|
|
63
65
|
Once `fetch_kindle_list` succeeds, you can load books information of downloaded pages anytime.
|
64
66
|
(You don't need to fetch pages with launching browser every time.)
|
65
67
|
|
66
|
-
```
|
68
|
+
```ruby
|
67
69
|
client = KindleManager::Client.new
|
68
70
|
books = client.load_kindle_books
|
69
71
|
```
|
70
72
|
|
71
|
-
|
73
|
+
Example of data
|
72
74
|
|
73
|
-
|
75
|
+
```ruby
|
76
|
+
console> pp books.first.to_hash
|
77
|
+
{"asin"=>"B0026OR2TU",
|
78
|
+
"title"=>
|
79
|
+
"Rails Cookbook: Recipes for Rapid Web Development with Ruby (Cookbooks (O'Reilly))",
|
80
|
+
"tag"=>"Sample",
|
81
|
+
"author"=>"Rob Orsini",
|
82
|
+
"date"=>Fri, 17 Mar 2017,
|
83
|
+
"collection_count"=>0}
|
84
|
+
```
|
85
|
+
|
86
|
+
#### Kindle highlights and notes
|
87
|
+
|
88
|
+
In console
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
require 'kindle_manager'
|
92
|
+
client = KindleManager::Client.new(verbose: true, limit: 10)
|
93
|
+
client.fetch_kindle_highlights
|
94
|
+
|
95
|
+
books = client.load_kindle_highlights
|
96
|
+
```
|
97
|
+
|
98
|
+
Example of data
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
console> pp books.first.to_hash
|
102
|
+
{"asin"=>"B004YW6M6G",
|
103
|
+
"title"=>
|
104
|
+
"Design Patterns in Ruby (Adobe Reader) (Addison-Wesley Professional Ruby Series)",
|
105
|
+
"author"=>"Russ Olsen",
|
106
|
+
"last_annotated_on"=>Wed, 21 Jun 2017,
|
107
|
+
"highlights_count"=>8,
|
108
|
+
"notes_count"=>7,
|
109
|
+
"highlights_and_notes"=>
|
110
|
+
[{"location"=>350,
|
111
|
+
"highlight"=>
|
112
|
+
"Design Patterns: Elements of Reusable Object-Oriented Software,",
|
113
|
+
"color"=>"orange",
|
114
|
+
"note"=>""},
|
115
|
+
{"location"=>351,
|
116
|
+
"highlight"=>"\"Gang of Four book\" (GoF)",
|
117
|
+
"color"=>"yellow",
|
118
|
+
"note"=>""},
|
119
|
+
{"location"=>356, "highlight"=>nil, "color"=>nil, "note"=>"note foo"},
|
120
|
+
...
|
121
|
+
{"location"=>385,
|
122
|
+
"highlight"=>nil,
|
123
|
+
"color"=>nil,
|
124
|
+
"note"=>"object oriented"}]}
|
125
|
+
```
|
126
|
+
|
127
|
+
#### Options
|
74
128
|
|
75
129
|
Limit fetching with number of fetched books: `client = KindleManager::Client.new(limit: 100)`
|
76
130
|
|
@@ -86,6 +140,12 @@ Firefox: `driver: :firefox`
|
|
86
140
|
|
87
141
|
Login and password: `login: 'xxx', password: 'yyy'`
|
88
142
|
|
143
|
+
Output debug log: `debug: true`
|
144
|
+
|
145
|
+
## TODO
|
146
|
+
|
147
|
+
- Limit the number of fetching books by date
|
148
|
+
|
89
149
|
## Applications
|
90
150
|
|
91
151
|
Applications using this gem
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module KindleManager
|
2
|
+
class BaseAdapter
|
3
|
+
include AmazonAuth::CommonExtension
|
4
|
+
|
5
|
+
attr_accessor :store, :session, :options
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
@options = options
|
9
|
+
@session = options.fetch(:session, nil)
|
10
|
+
extend(AmazonAuth::SessionExtension)
|
11
|
+
|
12
|
+
@store = KindleManager::FileStore.new(options.merge(session: @session))
|
13
|
+
log "Directory for downloaded pages is #{store.base_dir}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def limit
|
17
|
+
options.fetch(:limit, nil)
|
18
|
+
end
|
19
|
+
|
20
|
+
def max_scroll_attempts
|
21
|
+
options.fetch(:max_scroll_attempts, 20)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module KindleManager
|
2
|
+
class BooksAdapter < BaseAdapter
|
3
|
+
def fetch
|
4
|
+
go_to_kindle_management_page
|
5
|
+
begin
|
6
|
+
load_next_kindle_list
|
7
|
+
rescue => e
|
8
|
+
puts "[ERROR] #{e}"
|
9
|
+
puts e.backtrace
|
10
|
+
puts
|
11
|
+
puts "Retry manually -> client.adapter.load_next_kindle_list or client.session etc."
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def go_to_kindle_management_page
|
16
|
+
log "Visiting kindle management page"
|
17
|
+
wait_for_selector('#shopAllLinks', wait_time: 5)
|
18
|
+
3.times do
|
19
|
+
link = links_for('#navFooter a').find{|link| link =~ %r{/gp/digital/fiona/manage/} }
|
20
|
+
session.visit link
|
21
|
+
wait_for_selector('.navHeader_myx')
|
22
|
+
if session.first('.navHeader_myx')
|
23
|
+
log "Page found '#{session.first('.navHeader_myx').text}'"
|
24
|
+
break
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def load_next_kindle_list
|
30
|
+
wait_for_selector('.contentCount_myx')
|
31
|
+
current_loop = 0
|
32
|
+
while current_loop <= max_scroll_attempts
|
33
|
+
if limit && limit < number_of_fetched_books
|
34
|
+
break
|
35
|
+
elsif has_more_button?
|
36
|
+
snapshot_page
|
37
|
+
current_loop = 0
|
38
|
+
|
39
|
+
log "Clicking 'Show More'"
|
40
|
+
session.execute_script "window.scrollBy(0,-800)"
|
41
|
+
show_more_button.click
|
42
|
+
sleep 1
|
43
|
+
raise('Clicking of more button may have failed') if has_more_button?
|
44
|
+
else
|
45
|
+
log "Loading books with scrolling #{current_loop+1}"
|
46
|
+
session.execute_script "window.scrollBy(0,10000)"
|
47
|
+
end
|
48
|
+
sleep fetching_interval
|
49
|
+
current_loop += 1
|
50
|
+
end
|
51
|
+
log "Stopped loading"
|
52
|
+
snapshot_page
|
53
|
+
end
|
54
|
+
|
55
|
+
def load
|
56
|
+
books = []
|
57
|
+
store.list_html_files.each do |file|
|
58
|
+
parser = KindleManager::BooksParser.new(file)
|
59
|
+
books += parser.parse
|
60
|
+
end
|
61
|
+
books.uniq(&:asin)
|
62
|
+
end
|
63
|
+
|
64
|
+
def has_more_button?
|
65
|
+
!!show_more_button
|
66
|
+
end
|
67
|
+
|
68
|
+
def show_more_button
|
69
|
+
session.all('#contentTable_showMore_myx').find{|e| e['outerHTML'].match(/showmore_button/) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def number_of_fetched_books
|
73
|
+
re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)〜(\d+)/ : /(\d+) - (\d+)/)
|
74
|
+
wait_for_selector('.contentCount_myx')
|
75
|
+
text = doc.css('.contentCount_myx').text
|
76
|
+
m = text.match(re)
|
77
|
+
return m[2].to_i if m.present?
|
78
|
+
raise("Couldn't get the number of fetched books [#{text}]")
|
79
|
+
end
|
80
|
+
|
81
|
+
def loading?
|
82
|
+
session.first('.myx-popover-loading-wrapper').present?
|
83
|
+
end
|
84
|
+
|
85
|
+
def snapshot_page
|
86
|
+
if (text = doc.css('.contentCount_myx').try!(:text)).present?
|
87
|
+
log "Current page [#{text.to_s.gsub(/[[:space:]]+/, ' ').strip}]"
|
88
|
+
end
|
89
|
+
store.record_page
|
90
|
+
log "Saving page"
|
91
|
+
end
|
92
|
+
|
93
|
+
def fetching_interval
|
94
|
+
@options.fetch(:fetching_interval, 3)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module KindleManager
|
2
|
+
class HighlightsAdapter < BaseAdapter
|
3
|
+
KINDLE_HIGHLIGHT_URL = "https://read.#{AmazonInfo.domain}/kp/notebook"
|
4
|
+
|
5
|
+
attr_accessor :library_ids, :loaded_library_ids, :failed_library_ids
|
6
|
+
|
7
|
+
def fetch
|
8
|
+
go_to_kindle_highlights_page
|
9
|
+
fetch_library_ids
|
10
|
+
fetch_kindle_highlights
|
11
|
+
end
|
12
|
+
|
13
|
+
def go_to_kindle_highlights_page
|
14
|
+
log "Visiting kindle highlights page"
|
15
|
+
session.visit KINDLE_HIGHLIGHT_URL
|
16
|
+
wait_for_selector('#library')
|
17
|
+
check_library_scroll
|
18
|
+
snapshot_page
|
19
|
+
end
|
20
|
+
|
21
|
+
def fetch_library_ids
|
22
|
+
last_scroll_top = check_library_scroll
|
23
|
+
20.times do
|
24
|
+
scroll_library_pane(last_scroll_top + 20000)
|
25
|
+
sleep(2)
|
26
|
+
new_scroll_top = check_library_scroll
|
27
|
+
break if limit && limit < doc.css('#library #kp-notebook-library > .a-row').size
|
28
|
+
break if last_scroll_top == new_scroll_top
|
29
|
+
last_scroll_top = new_scroll_top
|
30
|
+
end
|
31
|
+
snapshot_page
|
32
|
+
self.library_ids = doc.css('#library #kp-notebook-library > .a-row').map{|e| e['id'] }
|
33
|
+
self.loaded_library_ids ||= []
|
34
|
+
self.failed_library_ids ||= []
|
35
|
+
log "Number of library ids is #{library_ids.size}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def check_library_scroll
|
39
|
+
scroll_top = session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).scrollTop")
|
40
|
+
scroll_height = session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).scrollHeight")
|
41
|
+
offset_height = session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).offsetHeight")
|
42
|
+
log "Scroll top:#{scroll_top} height:#{scroll_height} offset_height:#{offset_height}"
|
43
|
+
scroll_top
|
44
|
+
end
|
45
|
+
|
46
|
+
def scroll_library_pane(target_scroll_top)
|
47
|
+
session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).scrollTop = #{target_scroll_top}")
|
48
|
+
end
|
49
|
+
|
50
|
+
def fetch_kindle_highlights
|
51
|
+
library_ids.each_with_index do |library_id,i|
|
52
|
+
break if limit && limit < i+1
|
53
|
+
next if loaded_library_ids.include?(library_id)
|
54
|
+
fetch_book_with_highlights(library_id)
|
55
|
+
end
|
56
|
+
report_failed_ids
|
57
|
+
snapshot_page
|
58
|
+
end
|
59
|
+
|
60
|
+
def fetch_book_with_highlights(library_id)
|
61
|
+
log "Fetching highlights for the book #{library_id}"
|
62
|
+
session.first("##{library_id}").click
|
63
|
+
wait_for_selector('#annotations .kp-notebook-annotation-container', wait_time: 10)
|
64
|
+
title = doc.css('#annotations .kp-notebook-annotation-container h3.kp-notebook-metadata').try!(:text)
|
65
|
+
highlights_count, notes_count = fetch_highlights_and_notes
|
66
|
+
snapshot_page("Saving page for [#{title}] (#{library_id}) highlights:#{highlights_count} notes:#{notes_count}")
|
67
|
+
if title.present?
|
68
|
+
self.loaded_library_ids << library_id
|
69
|
+
else
|
70
|
+
self.failed_library_ids << library_id
|
71
|
+
log "[ERROR] Failed to load #{library_id} or this book doesn't have any highlights and notes"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def fetch_highlights_and_notes
|
76
|
+
highlights_count = notes_count = nil
|
77
|
+
10.times do
|
78
|
+
sleep(1)
|
79
|
+
highlights_count = doc.css('#annotations .kp-notebook-annotation-container #kp-notebook-highlights-count').try!(:text)
|
80
|
+
notes_count = doc.css('#annotations .kp-notebook-annotation-container #kp-notebook-notes-count').try!(:text)
|
81
|
+
break if highlights_count != '--' && notes_count != '--'
|
82
|
+
end
|
83
|
+
[highlights_count, notes_count]
|
84
|
+
end
|
85
|
+
|
86
|
+
def report_failed_ids
|
87
|
+
log("May have failed with #{failed_library_ids.inspect}. Retry with client.adapter.session.first('#B000000000').click") if failed_library_ids.size > 0
|
88
|
+
end
|
89
|
+
|
90
|
+
def load
|
91
|
+
books = []
|
92
|
+
store.list_html_files.each do |file|
|
93
|
+
parser = KindleManager::HighlightsParser.new(file)
|
94
|
+
books += parser.parse
|
95
|
+
end
|
96
|
+
books.reject(&:invalid?).uniq(&:asin)
|
97
|
+
end
|
98
|
+
|
99
|
+
def snapshot_page(message = nil)
|
100
|
+
store.record_page
|
101
|
+
log(message.presence || "Saving page")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -2,129 +2,50 @@ module KindleManager
|
|
2
2
|
class Client
|
3
3
|
include AmazonAuth::CommonExtension
|
4
4
|
|
5
|
-
attr_accessor :
|
5
|
+
attr_accessor :adapter
|
6
6
|
|
7
7
|
def initialize(options = {})
|
8
|
-
@limit = options.fetch(:limit, nil)
|
9
|
-
@max_scroll_attempts = options.fetch(:max_scroll_attempts, 20)
|
10
8
|
@options = options
|
11
9
|
@client = AmazonAuth::Client.new(@options)
|
12
10
|
extend(AmazonAuth::SessionExtension)
|
13
11
|
end
|
14
12
|
|
15
13
|
def session
|
16
|
-
@
|
14
|
+
@_session ||= @client.session
|
17
15
|
end
|
18
16
|
|
19
|
-
def
|
20
|
-
@
|
21
|
-
end
|
22
|
-
|
23
|
-
def setup_file_store
|
24
|
-
store.session = session
|
25
|
-
log "Directory for downloaded pages is #{store.base_dir}"
|
17
|
+
def sign_in
|
18
|
+
@client.sign_in
|
26
19
|
end
|
27
20
|
|
28
21
|
def fetch_kindle_list
|
29
22
|
sign_in
|
30
|
-
|
31
|
-
|
32
|
-
begin
|
33
|
-
load_next_kindle_list
|
34
|
-
rescue => e
|
35
|
-
puts "[ERROR] #{e}"
|
36
|
-
puts e.backtrace
|
37
|
-
puts
|
38
|
-
puts "Retry manually -> load_next_kindle_list or session etc."
|
39
|
-
end
|
23
|
+
set_adapter(:books, @options.merge(session: session))
|
24
|
+
adapter.fetch
|
40
25
|
end
|
41
26
|
|
42
|
-
def
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
books += parser.book_list
|
47
|
-
end
|
48
|
-
books.uniq(&:asin)
|
49
|
-
end
|
50
|
-
|
51
|
-
def sign_in
|
52
|
-
@client.sign_in
|
27
|
+
def fetch_kindle_highlights
|
28
|
+
sign_in
|
29
|
+
set_adapter(:highlights, @options.merge(session: session))
|
30
|
+
adapter.fetch
|
53
31
|
end
|
54
32
|
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
3.times do
|
59
|
-
link = links_for('#navFooter a').find{|link| link =~ %r{/gp/digital/fiona/manage/} }
|
60
|
-
session.visit link
|
61
|
-
wait_for_selector('.navHeader_myx')
|
62
|
-
if session.first('.navHeader_myx')
|
63
|
-
log "Page found '#{session.first('.navHeader_myx').text}'"
|
64
|
-
break
|
65
|
-
end
|
66
|
-
end
|
33
|
+
def load_kindle_books
|
34
|
+
set_adapter(:books, @options.except(:create))
|
35
|
+
adapter.load
|
67
36
|
end
|
68
37
|
|
69
|
-
def
|
70
|
-
|
71
|
-
|
72
|
-
while @current_loop <= @max_scroll_attempts
|
73
|
-
if @limit && @limit < number_of_fetched_books
|
74
|
-
break
|
75
|
-
elsif has_more_button?
|
76
|
-
snapshot_page
|
77
|
-
@current_loop = 0
|
78
|
-
|
79
|
-
log "Clicking 'Show More'"
|
80
|
-
session.execute_script "window.scrollBy(0,-800)"
|
81
|
-
show_more_button.click
|
82
|
-
sleep 1
|
83
|
-
raise('Clicking of more button may have failed') if has_more_button?
|
84
|
-
else
|
85
|
-
log "Loading books with scrolling #{@current_loop+1}"
|
86
|
-
session.execute_script "window.scrollBy(0,10000)"
|
87
|
-
end
|
88
|
-
sleep fetching_interval
|
89
|
-
@current_loop += 1
|
90
|
-
end
|
91
|
-
log "Stopped loading"
|
92
|
-
snapshot_page
|
38
|
+
def load_kindle_highlights
|
39
|
+
set_adapter(:highlights, @options.except(:create))
|
40
|
+
adapter.load
|
93
41
|
end
|
94
42
|
|
95
43
|
def quit
|
96
44
|
session.driver.quit
|
97
45
|
end
|
98
46
|
|
99
|
-
def
|
100
|
-
|
101
|
-
end
|
102
|
-
|
103
|
-
def show_more_button
|
104
|
-
session.all('#contentTable_showMore_myx').find{|e| e['outerHTML'].match(/showmore_button/) }
|
105
|
-
end
|
106
|
-
|
107
|
-
def number_of_fetched_books
|
108
|
-
re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)〜(\d+)/ : /(\d+) - (\d+)/)
|
109
|
-
wait_for_selector('.contentCount_myx')
|
110
|
-
text = session.first('.contentCount_myx').text
|
111
|
-
m = text.match(re)
|
112
|
-
return m[2].to_i if m.present?
|
113
|
-
raise("Couldn't get the number of fetched books [#{text}]")
|
114
|
-
end
|
115
|
-
|
116
|
-
def loading?
|
117
|
-
session.first('.myx-popover-loading-wrapper').present?
|
118
|
-
end
|
119
|
-
|
120
|
-
def snapshot_page
|
121
|
-
log "Current page [#{session.first('.contentCount_myx').text}]" if session.first('.contentCount_myx')
|
122
|
-
store.record_page
|
123
|
-
log "Saving page"
|
124
|
-
end
|
125
|
-
|
126
|
-
def fetching_interval
|
127
|
-
@options.fetch(:fetching_interval, 3)
|
47
|
+
def set_adapter(type, options)
|
48
|
+
@adapter = "KindleManager::#{type.to_s.camelize}Adapter".constantize.new(options.merge(sub_dir: type))
|
128
49
|
end
|
129
50
|
end
|
130
51
|
end
|
@@ -3,6 +3,7 @@ module KindleManager
|
|
3
3
|
attr_accessor :dir_name, :session
|
4
4
|
|
5
5
|
def initialize(options = {})
|
6
|
+
@sub_dir = options.fetch(:sub_dir, 'books').to_s
|
6
7
|
@dir_name = options.fetch(:dir_name) do
|
7
8
|
tmp_dir_name = options[:create] ? nil : find_latest_dir_name
|
8
9
|
tmp_dir_name.presence || Time.current.strftime("%Y%m%d%H%M%S")
|
@@ -10,12 +11,28 @@ module KindleManager
|
|
10
11
|
@session = options.fetch(:session, nil)
|
11
12
|
end
|
12
13
|
|
14
|
+
def downloads_dir
|
15
|
+
'downloads'
|
16
|
+
end
|
17
|
+
|
18
|
+
def root_dir
|
19
|
+
File.join(downloads_dir, @sub_dir)
|
20
|
+
end
|
21
|
+
|
13
22
|
def base_dir
|
14
|
-
File.join(
|
23
|
+
File.join(root_dir, @dir_name)
|
15
24
|
end
|
16
25
|
|
17
|
-
def
|
18
|
-
|
26
|
+
def list_work_dirs
|
27
|
+
Dir["#{root_dir}/*"].select{|f| File.directory? f }
|
28
|
+
end
|
29
|
+
|
30
|
+
def find_latest_dir_name
|
31
|
+
list_work_dirs.sort.last.to_s.split('/').last
|
32
|
+
end
|
33
|
+
|
34
|
+
def list_html_files(dir = nil)
|
35
|
+
Dir[File.join(base_dir,'*.html')].select{|f| File.file? f }
|
19
36
|
end
|
20
37
|
|
21
38
|
def html_path(time)
|
@@ -32,26 +49,6 @@ module KindleManager
|
|
32
49
|
@session.save_screenshot(image_path(time))
|
33
50
|
end
|
34
51
|
|
35
|
-
def self.list_download_dirs
|
36
|
-
Dir["#{downloads_dir}/*"].select{|f| File.directory? f }
|
37
|
-
end
|
38
|
-
|
39
|
-
def self.list_html_files(dir = nil)
|
40
|
-
if dir
|
41
|
-
Dir[File.join(downloads_dir, dir,'*.html')].select{|f| File.file? f }
|
42
|
-
else
|
43
|
-
Dir["#{downloads_dir}/*/*.html"].select{|f| File.file? f }
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def list_html_files
|
48
|
-
self.class.list_html_files(@dir_name)
|
49
|
-
end
|
50
|
-
|
51
|
-
def find_latest_dir_name
|
52
|
-
self.class.list_download_dirs.sort.last.to_s.split('/').last
|
53
|
-
end
|
54
|
-
|
55
52
|
private
|
56
53
|
|
57
54
|
def build_filepath(time, ext)
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module KindleManager
|
2
|
-
class
|
2
|
+
class BooksParser < BaseParser
|
3
3
|
class BookRow
|
4
|
+
include KindleManager::Parsers::Common
|
5
|
+
|
4
6
|
def initialize(node)
|
5
7
|
@node = node
|
6
8
|
end
|
@@ -26,15 +28,7 @@ module KindleManager
|
|
26
28
|
end
|
27
29
|
|
28
30
|
def date
|
29
|
-
@_date ||=
|
30
|
-
date_text = @node.css("div[id^='date']").text
|
31
|
-
begin
|
32
|
-
Date.parse(date_text)
|
33
|
-
rescue ArgumentError => e
|
34
|
-
m = date_text.match(/\A(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日\z/)
|
35
|
-
Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
|
36
|
-
end
|
37
|
-
end
|
31
|
+
@_date ||= parse_date(@node.css("div[id^='date']").text)
|
38
32
|
end
|
39
33
|
|
40
34
|
def collection_count
|
@@ -50,20 +44,8 @@ module KindleManager
|
|
50
44
|
end
|
51
45
|
end
|
52
46
|
|
53
|
-
def
|
54
|
-
@
|
55
|
-
end
|
56
|
-
|
57
|
-
def book_list
|
58
|
-
@book_list ||= doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e) }
|
59
|
-
end
|
60
|
-
|
61
|
-
def doc
|
62
|
-
@doc ||= Nokogiri::HTML(body)
|
63
|
-
end
|
64
|
-
|
65
|
-
def body
|
66
|
-
@body ||= File.read(@filepath)
|
47
|
+
def parse
|
48
|
+
@_parsed ||= doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e) }
|
67
49
|
end
|
68
50
|
end
|
69
51
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module KindleManager
|
2
|
+
module Parsers
|
3
|
+
module Common
|
4
|
+
|
5
|
+
def parse_date(date_text)
|
6
|
+
begin
|
7
|
+
Date.parse(date_text)
|
8
|
+
rescue ArgumentError => e
|
9
|
+
m = date_text.match(/\A(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日\z/)
|
10
|
+
m = date_text.match(/(?<month>\d{1,2})月\D+(?<day>\d{1,2}),\D+(?<year>\d{4})/) if m.nil?
|
11
|
+
Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module KindleManager
|
2
|
+
class HighlightsParser < BaseParser
|
3
|
+
class BookWithNote
|
4
|
+
include KindleManager::Parsers::Common
|
5
|
+
|
6
|
+
def initialize(node)
|
7
|
+
@node = node
|
8
|
+
end
|
9
|
+
|
10
|
+
def inspect
|
11
|
+
"#<#{self.class.name}:#{self.object_id} #{self.to_hash}>"
|
12
|
+
end
|
13
|
+
|
14
|
+
def asin
|
15
|
+
@_asin ||= @node.css('#kp-notebook-annotations-asin').first['value']
|
16
|
+
end
|
17
|
+
|
18
|
+
def title
|
19
|
+
@_title ||= @node.css('h3.kp-notebook-metadata').text
|
20
|
+
end
|
21
|
+
|
22
|
+
def author
|
23
|
+
@_author ||= @node.css('h1.kp-notebook-metadata').first.text
|
24
|
+
end
|
25
|
+
|
26
|
+
def last_annotated_on
|
27
|
+
@_last_annotated_on ||= parse_date(@node.css('#kp-notebook-annotated-date').text)
|
28
|
+
end
|
29
|
+
|
30
|
+
def highlights_count
|
31
|
+
@_highlights_count ||= @node.css('.kp-notebook-highlight').size
|
32
|
+
end
|
33
|
+
|
34
|
+
def notes_count
|
35
|
+
@_notes_count ||= @node.css('.kp-notebook-note').reject{|e| e['class'] =~ /aok-hidden/ }.size
|
36
|
+
end
|
37
|
+
|
38
|
+
def highlights_and_notes
|
39
|
+
@_highlights_and_notes ||= begin
|
40
|
+
# Excluding the first element which has book info
|
41
|
+
@node.css('.a-spacing-base')[1..-1].map do |node|
|
42
|
+
location = node.css('#kp-annotation-location').first['value'].to_i
|
43
|
+
highlight_node = node.css('.kp-notebook-highlight').first
|
44
|
+
highlight = highlight_node && highlight_node.css('#highlight').first.text
|
45
|
+
color = highlight_node && highlight_node['class'].split.find{|v| v =~ /kp-notebook-highlight-/ }.split('-').last
|
46
|
+
note = node.css('#note').first.text
|
47
|
+
{'location' => location, 'highlight' => highlight, 'color' => color, 'note' => note}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def highlights
|
53
|
+
highlights_and_notes.reject{|e| e['highlight'].blank? }
|
54
|
+
end
|
55
|
+
|
56
|
+
def notes
|
57
|
+
highlights_and_notes.reject{|e| e['note'].blank? }
|
58
|
+
end
|
59
|
+
|
60
|
+
# This can be used to verify the count of hightlights and notes
|
61
|
+
def count_summary
|
62
|
+
@_count_summary ||= begin
|
63
|
+
text = @node.css('h1.kp-notebook-metadata').last.text.strip
|
64
|
+
a, b = text.split('|').map{|text| m = text.match(/\d+/); m.nil? ? nil : m[0].to_i }
|
65
|
+
{'text' => text, 'highlights_count' => a, 'notes_count' => b}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def to_hash
|
70
|
+
hash = {}
|
71
|
+
%w[asin title author last_annotated_on highlights_count notes_count highlights_and_notes].each do |f|
|
72
|
+
hash[f] = send(f)
|
73
|
+
end
|
74
|
+
hash
|
75
|
+
end
|
76
|
+
|
77
|
+
def invalid?
|
78
|
+
!!(asin.blank? || count_summary['text'] =~ /--/)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def parse
|
83
|
+
@_parsed ||= begin
|
84
|
+
result = doc.css('.kp-notebook-annotation-container').map{|e| BookWithNote.new(e) }
|
85
|
+
puts "[DEBUG] This page(#{@filepath}) has many books. asin -> #{result.map(&:asin).join(',')}" if result.size >= 2
|
86
|
+
puts "[DEBUG] Incomplete page(#{@filepath}). asin:#{result.first.asin} #{result.first.title} (#{result.first.count_summary['text'].inspect})" if result.any?(&:invalid?)
|
87
|
+
result
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/lib/kindle_manager.rb
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
require 'amazon_auth'
|
2
2
|
require "kindle_manager/version"
|
3
|
+
require "kindle_manager/adapters/base_adapter"
|
4
|
+
require "kindle_manager/adapters/books_adapter"
|
5
|
+
require "kindle_manager/adapters/highlights_adapter"
|
3
6
|
require "kindle_manager/client"
|
4
7
|
require "kindle_manager/file_store"
|
5
|
-
require "kindle_manager/
|
8
|
+
require "kindle_manager/parsers/common"
|
9
|
+
require "kindle_manager/parsers/base_parser"
|
10
|
+
require "kindle_manager/parsers/books_parser"
|
11
|
+
require "kindle_manager/parsers/highlights_parser"
|
6
12
|
|
7
13
|
module KindleManager
|
8
14
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kindle_manager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kazuho Yamaguchi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amazon_auth
|
@@ -98,9 +98,15 @@ files:
|
|
98
98
|
- bin/setup
|
99
99
|
- kindle_manager.gemspec
|
100
100
|
- lib/kindle_manager.rb
|
101
|
+
- lib/kindle_manager/adapters/base_adapter.rb
|
102
|
+
- lib/kindle_manager/adapters/books_adapter.rb
|
103
|
+
- lib/kindle_manager/adapters/highlights_adapter.rb
|
101
104
|
- lib/kindle_manager/client.rb
|
102
105
|
- lib/kindle_manager/file_store.rb
|
103
|
-
- lib/kindle_manager/
|
106
|
+
- lib/kindle_manager/parsers/base_parser.rb
|
107
|
+
- lib/kindle_manager/parsers/books_parser.rb
|
108
|
+
- lib/kindle_manager/parsers/common.rb
|
109
|
+
- lib/kindle_manager/parsers/highlights_parser.rb
|
104
110
|
- lib/kindle_manager/version.rb
|
105
111
|
homepage: https://github.com/kyamaguchi/kindle_manager
|
106
112
|
licenses:
|