kindle_manager 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 336757c923bee51dec981128a5ebef870411051e
4
- data.tar.gz: 60ab2b249a6d074f5d00744b5d63bf088558b137
3
+ metadata.gz: 289fa1e86d32bf07025ccba4417a62e78d7a68fe
4
+ data.tar.gz: 1de00446cd5f21fb37b9dfdf236a60ccc6c51065
5
5
  SHA512:
6
- metadata.gz: 964510aebb3a9e57d07f48fa987f6abbcc99f304a4f73b0232448b97b11c2950dcb466caae7ff0afec56839ca63307300854281f95cb5097449879582887724f
7
- data.tar.gz: dfdf5c3f0c0a40b0bd05a9d81cac2c4be45f65b10547c1a7ae424368539fe2320cf8851371e4ab9020812e4e8f5f53199bc395da96bb29b6e42341de736b8da2
6
+ metadata.gz: b22358dc6f5a643fe72fb0ea934654db3c8984d498640c342b7bdbcd6c52a3929f4bb3bf3cee557c308602e09c42c60cfecc7d366d64851f35e97508e96683ba
7
+ data.tar.gz: efd71547be719c70dfdb883a42bacbbebf702a5791ed977c17ae2b7a61f630b70f974920ac2e1402a23a7b447cd0530ee088f25ac6299c9504ef6be6d0feee42
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![Gem Version](https://badge.fury.io/rb/kindle_manager.svg)](https://badge.fury.io/rb/kindle_manager)
4
4
  [![Build Status](https://travis-ci.org/kyamaguchi/kindle_manager.svg?branch=master)](https://travis-ci.org/kyamaguchi/kindle_manager)
5
5
 
6
- Scrape information of kindle books from amazon site
6
+ Scrape information of kindle books & highlights from amazon site
7
7
 
8
8
  ##### Fetch Kindle Books information
9
9
 
@@ -48,11 +48,13 @@ And `Dotenv.load` or `gem 'dotenv-rails'` may be required when you use this in y
48
48
 
49
49
  ### Run
50
50
 
51
+ #### Kindle books list
52
+
51
53
  In console
52
54
 
53
- ```
55
+ ```ruby
54
56
  require 'kindle_manager'
55
- client = KindleManager::Client.new(debug: true, limit: 1000)
57
+ client = KindleManager::Client.new(verbose: true, limit: 1000)
56
58
  client.fetch_kindle_list
57
59
 
58
60
  books = client.load_kindle_books
@@ -63,14 +65,66 @@ client.quit
63
65
  Once `fetch_kindle_list` succeeds, you can load books information of downloaded pages anytime.
64
66
  (You don't need to fetch pages with launching browser every time.)
65
67
 
66
- ```
68
+ ```ruby
67
69
  client = KindleManager::Client.new
68
70
  books = client.load_kindle_books
69
71
  ```
70
72
 
71
- #### Options
73
+ Example of data
72
74
 
73
- Debug print: `client = KindleManager::Client.new(debug: true)`
75
+ ```ruby
76
+ console> pp books.first.to_hash
77
+ {"asin"=>"B0026OR2TU",
78
+ "title"=>
79
+ "Rails Cookbook: Recipes for Rapid Web Development with Ruby (Cookbooks (O'Reilly))",
80
+ "tag"=>"Sample",
81
+ "author"=>"Rob Orsini",
82
+ "date"=>Fri, 17 Mar 2017,
83
+ "collection_count"=>0}
84
+ ```
85
+
86
+ #### Kindle highlights and notes
87
+
88
+ In console
89
+
90
+ ```ruby
91
+ require 'kindle_manager'
92
+ client = KindleManager::Client.new(verbose: true, limit: 10)
93
+ client.fetch_kindle_highlights
94
+
95
+ books = client.load_kindle_highlights
96
+ ```
97
+
98
+ Example of data
99
+
100
+ ```ruby
101
+ console> pp books.first.to_hash
102
+ {"asin"=>"B004YW6M6G",
103
+ "title"=>
104
+ "Design Patterns in Ruby (Adobe Reader) (Addison-Wesley Professional Ruby Series)",
105
+ "author"=>"Russ Olsen",
106
+ "last_annotated_on"=>Wed, 21 Jun 2017,
107
+ "highlights_count"=>8,
108
+ "notes_count"=>7,
109
+ "highlights_and_notes"=>
110
+ [{"location"=>350,
111
+ "highlight"=>
112
+ "Design Patterns: Elements of Reusable Object-Oriented Software,",
113
+ "color"=>"orange",
114
+ "note"=>""},
115
+ {"location"=>351,
116
+ "highlight"=>"\"Gang of Four book\" (GoF)",
117
+ "color"=>"yellow",
118
+ "note"=>""},
119
+ {"location"=>356, "highlight"=>nil, "color"=>nil, "note"=>"note foo"},
120
+ ...
121
+ {"location"=>385,
122
+ "highlight"=>nil,
123
+ "color"=>nil,
124
+ "note"=>"object oriented"}]}
125
+ ```
126
+
127
+ #### Options
74
128
 
75
129
  Limit fetching with number of fetched books: `client = KindleManager::Client.new(limit: 100)`
76
130
 
@@ -86,6 +140,12 @@ Firefox: `driver: :firefox`
86
140
 
87
141
  Login and password: `login: 'xxx', password: 'yyy'`
88
142
 
143
+ Output debug log: `debug: true`
144
+
145
+ ## TODO
146
+
147
+ - Limit the number of fetching books by date
148
+
89
149
  ## Applications
90
150
 
91
151
  Applications using this gem
@@ -0,0 +1,24 @@
1
+ module KindleManager
2
+ class BaseAdapter
3
+ include AmazonAuth::CommonExtension
4
+
5
+ attr_accessor :store, :session, :options
6
+
7
+ def initialize(options)
8
+ @options = options
9
+ @session = options.fetch(:session, nil)
10
+ extend(AmazonAuth::SessionExtension)
11
+
12
+ @store = KindleManager::FileStore.new(options.merge(session: @session))
13
+ log "Directory for downloaded pages is #{store.base_dir}"
14
+ end
15
+
16
+ def limit
17
+ options.fetch(:limit, nil)
18
+ end
19
+
20
+ def max_scroll_attempts
21
+ options.fetch(:max_scroll_attempts, 20)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,97 @@
1
+ module KindleManager
2
+ class BooksAdapter < BaseAdapter
3
+ def fetch
4
+ go_to_kindle_management_page
5
+ begin
6
+ load_next_kindle_list
7
+ rescue => e
8
+ puts "[ERROR] #{e}"
9
+ puts e.backtrace
10
+ puts
11
+ puts "Retry manually -> client.adapter.load_next_kindle_list or client.session etc."
12
+ end
13
+ end
14
+
15
+ def go_to_kindle_management_page
16
+ log "Visiting kindle management page"
17
+ wait_for_selector('#shopAllLinks', wait_time: 5)
18
+ 3.times do
19
+ link = links_for('#navFooter a').find{|link| link =~ %r{/gp/digital/fiona/manage/} }
20
+ session.visit link
21
+ wait_for_selector('.navHeader_myx')
22
+ if session.first('.navHeader_myx')
23
+ log "Page found '#{session.first('.navHeader_myx').text}'"
24
+ break
25
+ end
26
+ end
27
+ end
28
+
29
+ def load_next_kindle_list
30
+ wait_for_selector('.contentCount_myx')
31
+ current_loop = 0
32
+ while current_loop <= max_scroll_attempts
33
+ if limit && limit < number_of_fetched_books
34
+ break
35
+ elsif has_more_button?
36
+ snapshot_page
37
+ current_loop = 0
38
+
39
+ log "Clicking 'Show More'"
40
+ session.execute_script "window.scrollBy(0,-800)"
41
+ show_more_button.click
42
+ sleep 1
43
+ raise('Clicking of more button may have failed') if has_more_button?
44
+ else
45
+ log "Loading books with scrolling #{current_loop+1}"
46
+ session.execute_script "window.scrollBy(0,10000)"
47
+ end
48
+ sleep fetching_interval
49
+ current_loop += 1
50
+ end
51
+ log "Stopped loading"
52
+ snapshot_page
53
+ end
54
+
55
+ def load
56
+ books = []
57
+ store.list_html_files.each do |file|
58
+ parser = KindleManager::BooksParser.new(file)
59
+ books += parser.parse
60
+ end
61
+ books.uniq(&:asin)
62
+ end
63
+
64
+ def has_more_button?
65
+ !!show_more_button
66
+ end
67
+
68
+ def show_more_button
69
+ session.all('#contentTable_showMore_myx').find{|e| e['outerHTML'].match(/showmore_button/) }
70
+ end
71
+
72
+ def number_of_fetched_books
73
+ re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)〜(\d+)/ : /(\d+) - (\d+)/)
74
+ wait_for_selector('.contentCount_myx')
75
+ text = doc.css('.contentCount_myx').text
76
+ m = text.match(re)
77
+ return m[2].to_i if m.present?
78
+ raise("Couldn't get the number of fetched books [#{text}]")
79
+ end
80
+
81
+ def loading?
82
+ session.first('.myx-popover-loading-wrapper').present?
83
+ end
84
+
85
+ def snapshot_page
86
+ if (text = doc.css('.contentCount_myx').try!(:text)).present?
87
+ log "Current page [#{text.to_s.gsub(/[[:space:]]+/, ' ').strip}]"
88
+ end
89
+ store.record_page
90
+ log "Saving page"
91
+ end
92
+
93
+ def fetching_interval
94
+ @options.fetch(:fetching_interval, 3)
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,104 @@
1
+ module KindleManager
2
+ class HighlightsAdapter < BaseAdapter
3
+ KINDLE_HIGHLIGHT_URL = "https://read.#{AmazonInfo.domain}/kp/notebook"
4
+
5
+ attr_accessor :library_ids, :loaded_library_ids, :failed_library_ids
6
+
7
+ def fetch
8
+ go_to_kindle_highlights_page
9
+ fetch_library_ids
10
+ fetch_kindle_highlights
11
+ end
12
+
13
+ def go_to_kindle_highlights_page
14
+ log "Visiting kindle highlights page"
15
+ session.visit KINDLE_HIGHLIGHT_URL
16
+ wait_for_selector('#library')
17
+ check_library_scroll
18
+ snapshot_page
19
+ end
20
+
21
+ def fetch_library_ids
22
+ last_scroll_top = check_library_scroll
23
+ 20.times do
24
+ scroll_library_pane(last_scroll_top + 20000)
25
+ sleep(2)
26
+ new_scroll_top = check_library_scroll
27
+ break if limit && limit < doc.css('#library #kp-notebook-library > .a-row').size
28
+ break if last_scroll_top == new_scroll_top
29
+ last_scroll_top = new_scroll_top
30
+ end
31
+ snapshot_page
32
+ self.library_ids = doc.css('#library #kp-notebook-library > .a-row').map{|e| e['id'] }
33
+ self.loaded_library_ids ||= []
34
+ self.failed_library_ids ||= []
35
+ log "Number of library ids is #{library_ids.size}"
36
+ end
37
+
38
+ def check_library_scroll
39
+ scroll_top = session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).scrollTop")
40
+ scroll_height = session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).scrollHeight")
41
+ offset_height = session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).offsetHeight")
42
+ log "Scroll top:#{scroll_top} height:#{scroll_height} offset_height:#{offset_height}"
43
+ scroll_top
44
+ end
45
+
46
+ def scroll_library_pane(target_scroll_top)
47
+ session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).scrollTop = #{target_scroll_top}")
48
+ end
49
+
50
+ def fetch_kindle_highlights
51
+ library_ids.each_with_index do |library_id,i|
52
+ break if limit && limit < i+1
53
+ next if loaded_library_ids.include?(library_id)
54
+ fetch_book_with_highlights(library_id)
55
+ end
56
+ report_failed_ids
57
+ snapshot_page
58
+ end
59
+
60
+ def fetch_book_with_highlights(library_id)
61
+ log "Fetching highlights for the book #{library_id}"
62
+ session.first("##{library_id}").click
63
+ wait_for_selector('#annotations .kp-notebook-annotation-container', wait_time: 10)
64
+ title = doc.css('#annotations .kp-notebook-annotation-container h3.kp-notebook-metadata').try!(:text)
65
+ highlights_count, notes_count = fetch_highlights_and_notes
66
+ snapshot_page("Saving page for [#{title}] (#{library_id}) highlights:#{highlights_count} notes:#{notes_count}")
67
+ if title.present?
68
+ self.loaded_library_ids << library_id
69
+ else
70
+ self.failed_library_ids << library_id
71
+ log "[ERROR] Failed to load #{library_id} or this book doesn't have any highlights and notes"
72
+ end
73
+ end
74
+
75
+ def fetch_highlights_and_notes
76
+ highlights_count = notes_count = nil
77
+ 10.times do
78
+ sleep(1)
79
+ highlights_count = doc.css('#annotations .kp-notebook-annotation-container #kp-notebook-highlights-count').try!(:text)
80
+ notes_count = doc.css('#annotations .kp-notebook-annotation-container #kp-notebook-notes-count').try!(:text)
81
+ break if highlights_count != '--' && notes_count != '--'
82
+ end
83
+ [highlights_count, notes_count]
84
+ end
85
+
86
+ def report_failed_ids
87
+ log("May have failed with #{failed_library_ids.inspect}. Retry with client.adapter.session.first('#B000000000').click") if failed_library_ids.size > 0
88
+ end
89
+
90
+ def load
91
+ books = []
92
+ store.list_html_files.each do |file|
93
+ parser = KindleManager::HighlightsParser.new(file)
94
+ books += parser.parse
95
+ end
96
+ books.reject(&:invalid?).uniq(&:asin)
97
+ end
98
+
99
+ def snapshot_page(message = nil)
100
+ store.record_page
101
+ log(message.presence || "Saving page")
102
+ end
103
+ end
104
+ end
@@ -2,129 +2,50 @@ module KindleManager
2
2
  class Client
3
3
  include AmazonAuth::CommonExtension
4
4
 
5
- attr_accessor :session
5
+ attr_accessor :adapter
6
6
 
7
7
  def initialize(options = {})
8
- @limit = options.fetch(:limit, nil)
9
- @max_scroll_attempts = options.fetch(:max_scroll_attempts, 20)
10
8
  @options = options
11
9
  @client = AmazonAuth::Client.new(@options)
12
10
  extend(AmazonAuth::SessionExtension)
13
11
  end
14
12
 
15
13
  def session
16
- @session ||= @client.session
14
+ @_session ||= @client.session
17
15
  end
18
16
 
19
- def store
20
- @store ||= KindleManager::FileStore.new(@options)
21
- end
22
-
23
- def setup_file_store
24
- store.session = session
25
- log "Directory for downloaded pages is #{store.base_dir}"
17
+ def sign_in
18
+ @client.sign_in
26
19
  end
27
20
 
28
21
  def fetch_kindle_list
29
22
  sign_in
30
- setup_file_store
31
- go_to_kindle_management_page
32
- begin
33
- load_next_kindle_list
34
- rescue => e
35
- puts "[ERROR] #{e}"
36
- puts e.backtrace
37
- puts
38
- puts "Retry manually -> load_next_kindle_list or session etc."
39
- end
23
+ set_adapter(:books, @options.merge(session: session))
24
+ adapter.fetch
40
25
  end
41
26
 
42
- def load_kindle_books
43
- books = []
44
- store.list_html_files.each do |file|
45
- parser = KindleManager::ListParser.new(file)
46
- books += parser.book_list
47
- end
48
- books.uniq(&:asin)
49
- end
50
-
51
- def sign_in
52
- @client.sign_in
27
+ def fetch_kindle_highlights
28
+ sign_in
29
+ set_adapter(:highlights, @options.merge(session: session))
30
+ adapter.fetch
53
31
  end
54
32
 
55
- def go_to_kindle_management_page
56
- log "Visiting kindle management page"
57
- wait_for_selector('#shopAllLinks', wait_time: 5)
58
- 3.times do
59
- link = links_for('#navFooter a').find{|link| link =~ %r{/gp/digital/fiona/manage/} }
60
- session.visit link
61
- wait_for_selector('.navHeader_myx')
62
- if session.first('.navHeader_myx')
63
- log "Page found '#{session.first('.navHeader_myx').text}'"
64
- break
65
- end
66
- end
33
+ def load_kindle_books
34
+ set_adapter(:books, @options.except(:create))
35
+ adapter.load
67
36
  end
68
37
 
69
- def load_next_kindle_list
70
- wait_for_selector('.contentCount_myx')
71
- @current_loop = 0
72
- while @current_loop <= @max_scroll_attempts
73
- if @limit && @limit < number_of_fetched_books
74
- break
75
- elsif has_more_button?
76
- snapshot_page
77
- @current_loop = 0
78
-
79
- log "Clicking 'Show More'"
80
- session.execute_script "window.scrollBy(0,-800)"
81
- show_more_button.click
82
- sleep 1
83
- raise('Clicking of more button may have failed') if has_more_button?
84
- else
85
- log "Loading books with scrolling #{@current_loop+1}"
86
- session.execute_script "window.scrollBy(0,10000)"
87
- end
88
- sleep fetching_interval
89
- @current_loop += 1
90
- end
91
- log "Stopped loading"
92
- snapshot_page
38
+ def load_kindle_highlights
39
+ set_adapter(:highlights, @options.except(:create))
40
+ adapter.load
93
41
  end
94
42
 
95
43
  def quit
96
44
  session.driver.quit
97
45
  end
98
46
 
99
- def has_more_button?
100
- !!show_more_button
101
- end
102
-
103
- def show_more_button
104
- session.all('#contentTable_showMore_myx').find{|e| e['outerHTML'].match(/showmore_button/) }
105
- end
106
-
107
- def number_of_fetched_books
108
- re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)〜(\d+)/ : /(\d+) - (\d+)/)
109
- wait_for_selector('.contentCount_myx')
110
- text = session.first('.contentCount_myx').text
111
- m = text.match(re)
112
- return m[2].to_i if m.present?
113
- raise("Couldn't get the number of fetched books [#{text}]")
114
- end
115
-
116
- def loading?
117
- session.first('.myx-popover-loading-wrapper').present?
118
- end
119
-
120
- def snapshot_page
121
- log "Current page [#{session.first('.contentCount_myx').text}]" if session.first('.contentCount_myx')
122
- store.record_page
123
- log "Saving page"
124
- end
125
-
126
- def fetching_interval
127
- @options.fetch(:fetching_interval, 3)
47
+ def set_adapter(type, options)
48
+ @adapter = "KindleManager::#{type.to_s.camelize}Adapter".constantize.new(options.merge(sub_dir: type))
128
49
  end
129
50
  end
130
51
  end
@@ -3,6 +3,7 @@ module KindleManager
3
3
  attr_accessor :dir_name, :session
4
4
 
5
5
  def initialize(options = {})
6
+ @sub_dir = options.fetch(:sub_dir, 'books').to_s
6
7
  @dir_name = options.fetch(:dir_name) do
7
8
  tmp_dir_name = options[:create] ? nil : find_latest_dir_name
8
9
  tmp_dir_name.presence || Time.current.strftime("%Y%m%d%H%M%S")
@@ -10,12 +11,28 @@ module KindleManager
10
11
  @session = options.fetch(:session, nil)
11
12
  end
12
13
 
14
+ def downloads_dir
15
+ 'downloads'
16
+ end
17
+
18
+ def root_dir
19
+ File.join(downloads_dir, @sub_dir)
20
+ end
21
+
13
22
  def base_dir
14
- File.join(self.class.downloads_dir, @dir_name)
23
+ File.join(root_dir, @dir_name)
15
24
  end
16
25
 
17
- def self.downloads_dir
18
- 'downloads'
26
+ def list_work_dirs
27
+ Dir["#{root_dir}/*"].select{|f| File.directory? f }
28
+ end
29
+
30
+ def find_latest_dir_name
31
+ list_work_dirs.sort.last.to_s.split('/').last
32
+ end
33
+
34
+ def list_html_files(dir = nil)
35
+ Dir[File.join(base_dir,'*.html')].select{|f| File.file? f }
19
36
  end
20
37
 
21
38
  def html_path(time)
@@ -32,26 +49,6 @@ module KindleManager
32
49
  @session.save_screenshot(image_path(time))
33
50
  end
34
51
 
35
- def self.list_download_dirs
36
- Dir["#{downloads_dir}/*"].select{|f| File.directory? f }
37
- end
38
-
39
- def self.list_html_files(dir = nil)
40
- if dir
41
- Dir[File.join(downloads_dir, dir,'*.html')].select{|f| File.file? f }
42
- else
43
- Dir["#{downloads_dir}/*/*.html"].select{|f| File.file? f }
44
- end
45
- end
46
-
47
- def list_html_files
48
- self.class.list_html_files(@dir_name)
49
- end
50
-
51
- def find_latest_dir_name
52
- self.class.list_download_dirs.sort.last.to_s.split('/').last
53
- end
54
-
55
52
  private
56
53
 
57
54
  def build_filepath(time, ext)
@@ -0,0 +1,16 @@
1
+ module KindleManager
2
+ class BaseParser
3
+
4
+ def initialize(filepath, options = {})
5
+ @filepath = filepath
6
+ end
7
+
8
+ def doc
9
+ @doc ||= Nokogiri::HTML(body)
10
+ end
11
+
12
+ def body
13
+ @body ||= File.read(@filepath)
14
+ end
15
+ end
16
+ end
@@ -1,6 +1,8 @@
1
1
  module KindleManager
2
- class ListParser
2
+ class BooksParser < BaseParser
3
3
  class BookRow
4
+ include KindleManager::Parsers::Common
5
+
4
6
  def initialize(node)
5
7
  @node = node
6
8
  end
@@ -26,15 +28,7 @@ module KindleManager
26
28
  end
27
29
 
28
30
  def date
29
- @_date ||= begin
30
- date_text = @node.css("div[id^='date']").text
31
- begin
32
- Date.parse(date_text)
33
- rescue ArgumentError => e
34
- m = date_text.match(/\A(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日\z/)
35
- Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
36
- end
37
- end
31
+ @_date ||= parse_date(@node.css("div[id^='date']").text)
38
32
  end
39
33
 
40
34
  def collection_count
@@ -50,20 +44,8 @@ module KindleManager
50
44
  end
51
45
  end
52
46
 
53
- def initialize(filepath, options = {})
54
- @filepath = filepath
55
- end
56
-
57
- def book_list
58
- @book_list ||= doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e) }
59
- end
60
-
61
- def doc
62
- @doc ||= Nokogiri::HTML(body)
63
- end
64
-
65
- def body
66
- @body ||= File.read(@filepath)
47
+ def parse
48
+ @_parsed ||= doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e) }
67
49
  end
68
50
  end
69
51
  end
@@ -0,0 +1,16 @@
1
+ module KindleManager
2
+ module Parsers
3
+ module Common
4
+
5
+ def parse_date(date_text)
6
+ begin
7
+ Date.parse(date_text)
8
+ rescue ArgumentError => e
9
+ m = date_text.match(/\A(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日\z/)
10
+ m = date_text.match(/(?<month>\d{1,2})月\D+(?<day>\d{1,2}),\D+(?<year>\d{4})/) if m.nil?
11
+ Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,91 @@
1
+ module KindleManager
2
+ class HighlightsParser < BaseParser
3
+ class BookWithNote
4
+ include KindleManager::Parsers::Common
5
+
6
+ def initialize(node)
7
+ @node = node
8
+ end
9
+
10
+ def inspect
11
+ "#<#{self.class.name}:#{self.object_id} #{self.to_hash}>"
12
+ end
13
+
14
+ def asin
15
+ @_asin ||= @node.css('#kp-notebook-annotations-asin').first['value']
16
+ end
17
+
18
+ def title
19
+ @_title ||= @node.css('h3.kp-notebook-metadata').text
20
+ end
21
+
22
+ def author
23
+ @_author ||= @node.css('h1.kp-notebook-metadata').first.text
24
+ end
25
+
26
+ def last_annotated_on
27
+ @_last_annotated_on ||= parse_date(@node.css('#kp-notebook-annotated-date').text)
28
+ end
29
+
30
+ def highlights_count
31
+ @_highlights_count ||= @node.css('.kp-notebook-highlight').size
32
+ end
33
+
34
+ def notes_count
35
+ @_notes_count ||= @node.css('.kp-notebook-note').reject{|e| e['class'] =~ /aok-hidden/ }.size
36
+ end
37
+
38
+ def highlights_and_notes
39
+ @_highlights_and_notes ||= begin
40
+ # Excluding the first element which has book info
41
+ @node.css('.a-spacing-base')[1..-1].map do |node|
42
+ location = node.css('#kp-annotation-location').first['value'].to_i
43
+ highlight_node = node.css('.kp-notebook-highlight').first
44
+ highlight = highlight_node && highlight_node.css('#highlight').first.text
45
+ color = highlight_node && highlight_node['class'].split.find{|v| v =~ /kp-notebook-highlight-/ }.split('-').last
46
+ note = node.css('#note').first.text
47
+ {'location' => location, 'highlight' => highlight, 'color' => color, 'note' => note}
48
+ end
49
+ end
50
+ end
51
+
52
+ def highlights
53
+ highlights_and_notes.reject{|e| e['highlight'].blank? }
54
+ end
55
+
56
+ def notes
57
+ highlights_and_notes.reject{|e| e['note'].blank? }
58
+ end
59
+
60
+ # This can be used to verify the count of hightlights and notes
61
+ def count_summary
62
+ @_count_summary ||= begin
63
+ text = @node.css('h1.kp-notebook-metadata').last.text.strip
64
+ a, b = text.split('|').map{|text| m = text.match(/\d+/); m.nil? ? nil : m[0].to_i }
65
+ {'text' => text, 'highlights_count' => a, 'notes_count' => b}
66
+ end
67
+ end
68
+
69
+ def to_hash
70
+ hash = {}
71
+ %w[asin title author last_annotated_on highlights_count notes_count highlights_and_notes].each do |f|
72
+ hash[f] = send(f)
73
+ end
74
+ hash
75
+ end
76
+
77
+ def invalid?
78
+ !!(asin.blank? || count_summary['text'] =~ /--/)
79
+ end
80
+ end
81
+
82
+ def parse
83
+ @_parsed ||= begin
84
+ result = doc.css('.kp-notebook-annotation-container').map{|e| BookWithNote.new(e) }
85
+ puts "[DEBUG] This page(#{@filepath}) has many books. asin -> #{result.map(&:asin).join(',')}" if result.size >= 2
86
+ puts "[DEBUG] Incomplete page(#{@filepath}). asin:#{result.first.asin} #{result.first.title} (#{result.first.count_summary['text'].inspect})" if result.any?(&:invalid?)
87
+ result
88
+ end
89
+ end
90
+ end
91
+ end
@@ -1,3 +1,3 @@
1
1
  module KindleManager
2
- VERSION = "0.2.2"
2
+ VERSION = "0.3.0"
3
3
  end
@@ -1,8 +1,14 @@
1
1
  require 'amazon_auth'
2
2
  require "kindle_manager/version"
3
+ require "kindle_manager/adapters/base_adapter"
4
+ require "kindle_manager/adapters/books_adapter"
5
+ require "kindle_manager/adapters/highlights_adapter"
3
6
  require "kindle_manager/client"
4
7
  require "kindle_manager/file_store"
5
- require "kindle_manager/list_parser"
8
+ require "kindle_manager/parsers/common"
9
+ require "kindle_manager/parsers/base_parser"
10
+ require "kindle_manager/parsers/books_parser"
11
+ require "kindle_manager/parsers/highlights_parser"
6
12
 
7
13
  module KindleManager
8
14
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kindle_manager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kazuho Yamaguchi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-06-17 00:00:00.000000000 Z
11
+ date: 2017-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: amazon_auth
@@ -98,9 +98,15 @@ files:
98
98
  - bin/setup
99
99
  - kindle_manager.gemspec
100
100
  - lib/kindle_manager.rb
101
+ - lib/kindle_manager/adapters/base_adapter.rb
102
+ - lib/kindle_manager/adapters/books_adapter.rb
103
+ - lib/kindle_manager/adapters/highlights_adapter.rb
101
104
  - lib/kindle_manager/client.rb
102
105
  - lib/kindle_manager/file_store.rb
103
- - lib/kindle_manager/list_parser.rb
106
+ - lib/kindle_manager/parsers/base_parser.rb
107
+ - lib/kindle_manager/parsers/books_parser.rb
108
+ - lib/kindle_manager/parsers/common.rb
109
+ - lib/kindle_manager/parsers/highlights_parser.rb
104
110
  - lib/kindle_manager/version.rb
105
111
  homepage: https://github.com/kyamaguchi/kindle_manager
106
112
  licenses: