kindle_manager 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 336757c923bee51dec981128a5ebef870411051e
4
- data.tar.gz: 60ab2b249a6d074f5d00744b5d63bf088558b137
3
+ metadata.gz: 289fa1e86d32bf07025ccba4417a62e78d7a68fe
4
+ data.tar.gz: 1de00446cd5f21fb37b9dfdf236a60ccc6c51065
5
5
  SHA512:
6
- metadata.gz: 964510aebb3a9e57d07f48fa987f6abbcc99f304a4f73b0232448b97b11c2950dcb466caae7ff0afec56839ca63307300854281f95cb5097449879582887724f
7
- data.tar.gz: dfdf5c3f0c0a40b0bd05a9d81cac2c4be45f65b10547c1a7ae424368539fe2320cf8851371e4ab9020812e4e8f5f53199bc395da96bb29b6e42341de736b8da2
6
+ metadata.gz: b22358dc6f5a643fe72fb0ea934654db3c8984d498640c342b7bdbcd6c52a3929f4bb3bf3cee557c308602e09c42c60cfecc7d366d64851f35e97508e96683ba
7
+ data.tar.gz: efd71547be719c70dfdb883a42bacbbebf702a5791ed977c17ae2b7a61f630b70f974920ac2e1402a23a7b447cd0530ee088f25ac6299c9504ef6be6d0feee42
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![Gem Version](https://badge.fury.io/rb/kindle_manager.svg)](https://badge.fury.io/rb/kindle_manager)
4
4
  [![Build Status](https://travis-ci.org/kyamaguchi/kindle_manager.svg?branch=master)](https://travis-ci.org/kyamaguchi/kindle_manager)
5
5
 
6
- Scrape information of kindle books from amazon site
6
+ Scrape information of kindle books & highlights from amazon site
7
7
 
8
8
  ##### Fetch Kindle Books information
9
9
 
@@ -48,11 +48,13 @@ And `Dotenv.load` or `gem 'dotenv-rails'` may be required when you use this in y
48
48
 
49
49
  ### Run
50
50
 
51
+ #### Kindle books list
52
+
51
53
  In console
52
54
 
53
- ```
55
+ ```ruby
54
56
  require 'kindle_manager'
55
- client = KindleManager::Client.new(debug: true, limit: 1000)
57
+ client = KindleManager::Client.new(verbose: true, limit: 1000)
56
58
  client.fetch_kindle_list
57
59
 
58
60
  books = client.load_kindle_books
@@ -63,14 +65,66 @@ client.quit
63
65
  Once `fetch_kindle_list` succeeds, you can load books information of downloaded pages anytime.
64
66
  (You don't need to fetch pages with launching browser every time.)
65
67
 
66
- ```
68
+ ```ruby
67
69
  client = KindleManager::Client.new
68
70
  books = client.load_kindle_books
69
71
  ```
70
72
 
71
- #### Options
73
+ Example of data
72
74
 
73
- Debug print: `client = KindleManager::Client.new(debug: true)`
75
+ ```ruby
76
+ console> pp books.first.to_hash
77
+ {"asin"=>"B0026OR2TU",
78
+ "title"=>
79
+ "Rails Cookbook: Recipes for Rapid Web Development with Ruby (Cookbooks (O'Reilly))",
80
+ "tag"=>"Sample",
81
+ "author"=>"Rob Orsini",
82
+ "date"=>Fri, 17 Mar 2017,
83
+ "collection_count"=>0}
84
+ ```
85
+
86
+ #### Kindle highlights and notes
87
+
88
+ In console
89
+
90
+ ```ruby
91
+ require 'kindle_manager'
92
+ client = KindleManager::Client.new(verbose: true, limit: 10)
93
+ client.fetch_kindle_highlights
94
+
95
+ books = client.load_kindle_highlights
96
+ ```
97
+
98
+ Example of data
99
+
100
+ ```ruby
101
+ console> pp books.first.to_hash
102
+ {"asin"=>"B004YW6M6G",
103
+ "title"=>
104
+ "Design Patterns in Ruby (Adobe Reader) (Addison-Wesley Professional Ruby Series)",
105
+ "author"=>"Russ Olsen",
106
+ "last_annotated_on"=>Wed, 21 Jun 2017,
107
+ "highlights_count"=>8,
108
+ "notes_count"=>7,
109
+ "highlights_and_notes"=>
110
+ [{"location"=>350,
111
+ "highlight"=>
112
+ "Design Patterns: Elements of Reusable Object-Oriented Software,",
113
+ "color"=>"orange",
114
+ "note"=>""},
115
+ {"location"=>351,
116
+ "highlight"=>"\"Gang of Four book\" (GoF)",
117
+ "color"=>"yellow",
118
+ "note"=>""},
119
+ {"location"=>356, "highlight"=>nil, "color"=>nil, "note"=>"note foo"},
120
+ ...
121
+ {"location"=>385,
122
+ "highlight"=>nil,
123
+ "color"=>nil,
124
+ "note"=>"object oriented"}]}
125
+ ```
126
+
127
+ #### Options
74
128
 
75
129
  Limit fetching with number of fetched books: `client = KindleManager::Client.new(limit: 100)`
76
130
 
@@ -86,6 +140,12 @@ Firefox: `driver: :firefox`
86
140
 
87
141
  Login and password: `login: 'xxx', password: 'yyy'`
88
142
 
143
+ Output debug log: `debug: true`
144
+
145
+ ## TODO
146
+
147
+ - Limit the number of fetching books by date
148
+
89
149
  ## Applications
90
150
 
91
151
  Applications using this gem
@@ -0,0 +1,24 @@
1
+ module KindleManager
2
+ class BaseAdapter
3
+ include AmazonAuth::CommonExtension
4
+
5
+ attr_accessor :store, :session, :options
6
+
7
+ def initialize(options)
8
+ @options = options
9
+ @session = options.fetch(:session, nil)
10
+ extend(AmazonAuth::SessionExtension)
11
+
12
+ @store = KindleManager::FileStore.new(options.merge(session: @session))
13
+ log "Directory for downloaded pages is #{store.base_dir}"
14
+ end
15
+
16
+ def limit
17
+ options.fetch(:limit, nil)
18
+ end
19
+
20
+ def max_scroll_attempts
21
+ options.fetch(:max_scroll_attempts, 20)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,97 @@
1
+ module KindleManager
2
+ class BooksAdapter < BaseAdapter
3
+ def fetch
4
+ go_to_kindle_management_page
5
+ begin
6
+ load_next_kindle_list
7
+ rescue => e
8
+ puts "[ERROR] #{e}"
9
+ puts e.backtrace
10
+ puts
11
+ puts "Retry manually -> client.adapter.load_next_kindle_list or client.session etc."
12
+ end
13
+ end
14
+
15
+ def go_to_kindle_management_page
16
+ log "Visiting kindle management page"
17
+ wait_for_selector('#shopAllLinks', wait_time: 5)
18
+ 3.times do
19
+ link = links_for('#navFooter a').find{|link| link =~ %r{/gp/digital/fiona/manage/} }
20
+ session.visit link
21
+ wait_for_selector('.navHeader_myx')
22
+ if session.first('.navHeader_myx')
23
+ log "Page found '#{session.first('.navHeader_myx').text}'"
24
+ break
25
+ end
26
+ end
27
+ end
28
+
29
+ def load_next_kindle_list
30
+ wait_for_selector('.contentCount_myx')
31
+ current_loop = 0
32
+ while current_loop <= max_scroll_attempts
33
+ if limit && limit < number_of_fetched_books
34
+ break
35
+ elsif has_more_button?
36
+ snapshot_page
37
+ current_loop = 0
38
+
39
+ log "Clicking 'Show More'"
40
+ session.execute_script "window.scrollBy(0,-800)"
41
+ show_more_button.click
42
+ sleep 1
43
+ raise('Clicking of more button may have failed') if has_more_button?
44
+ else
45
+ log "Loading books with scrolling #{current_loop+1}"
46
+ session.execute_script "window.scrollBy(0,10000)"
47
+ end
48
+ sleep fetching_interval
49
+ current_loop += 1
50
+ end
51
+ log "Stopped loading"
52
+ snapshot_page
53
+ end
54
+
55
+ def load
56
+ books = []
57
+ store.list_html_files.each do |file|
58
+ parser = KindleManager::BooksParser.new(file)
59
+ books += parser.parse
60
+ end
61
+ books.uniq(&:asin)
62
+ end
63
+
64
+ def has_more_button?
65
+ !!show_more_button
66
+ end
67
+
68
+ def show_more_button
69
+ session.all('#contentTable_showMore_myx').find{|e| e['outerHTML'].match(/showmore_button/) }
70
+ end
71
+
72
+ def number_of_fetched_books
73
+ re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)〜(\d+)/ : /(\d+) - (\d+)/)
74
+ wait_for_selector('.contentCount_myx')
75
+ text = doc.css('.contentCount_myx').text
76
+ m = text.match(re)
77
+ return m[2].to_i if m.present?
78
+ raise("Couldn't get the number of fetched books [#{text}]")
79
+ end
80
+
81
+ def loading?
82
+ session.first('.myx-popover-loading-wrapper').present?
83
+ end
84
+
85
+ def snapshot_page
86
+ if (text = doc.css('.contentCount_myx').try!(:text)).present?
87
+ log "Current page [#{text.to_s.gsub(/[[:space:]]+/, ' ').strip}]"
88
+ end
89
+ store.record_page
90
+ log "Saving page"
91
+ end
92
+
93
+ def fetching_interval
94
+ @options.fetch(:fetching_interval, 3)
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,104 @@
1
+ module KindleManager
2
+ class HighlightsAdapter < BaseAdapter
3
+ KINDLE_HIGHLIGHT_URL = "https://read.#{AmazonInfo.domain}/kp/notebook"
4
+
5
+ attr_accessor :library_ids, :loaded_library_ids, :failed_library_ids
6
+
7
+ def fetch
8
+ go_to_kindle_highlights_page
9
+ fetch_library_ids
10
+ fetch_kindle_highlights
11
+ end
12
+
13
+ def go_to_kindle_highlights_page
14
+ log "Visiting kindle highlights page"
15
+ session.visit KINDLE_HIGHLIGHT_URL
16
+ wait_for_selector('#library')
17
+ check_library_scroll
18
+ snapshot_page
19
+ end
20
+
21
+ def fetch_library_ids
22
+ last_scroll_top = check_library_scroll
23
+ 20.times do
24
+ scroll_library_pane(last_scroll_top + 20000)
25
+ sleep(2)
26
+ new_scroll_top = check_library_scroll
27
+ break if limit && limit < doc.css('#library #kp-notebook-library > .a-row').size
28
+ break if last_scroll_top == new_scroll_top
29
+ last_scroll_top = new_scroll_top
30
+ end
31
+ snapshot_page
32
+ self.library_ids = doc.css('#library #kp-notebook-library > .a-row').map{|e| e['id'] }
33
+ self.loaded_library_ids ||= []
34
+ self.failed_library_ids ||= []
35
+ log "Number of library ids is #{library_ids.size}"
36
+ end
37
+
38
+ def check_library_scroll
39
+ scroll_top = session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).scrollTop")
40
+ scroll_height = session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).scrollHeight")
41
+ offset_height = session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).offsetHeight")
42
+ log "Scroll top:#{scroll_top} height:#{scroll_height} offset_height:#{offset_height}"
43
+ scroll_top
44
+ end
45
+
46
+ def scroll_library_pane(target_scroll_top)
47
+ session.evaluate_script("$('#library .kp-notebook-scroller-addon').get(0).scrollTop = #{target_scroll_top}")
48
+ end
49
+
50
+ def fetch_kindle_highlights
51
+ library_ids.each_with_index do |library_id,i|
52
+ break if limit && limit < i+1
53
+ next if loaded_library_ids.include?(library_id)
54
+ fetch_book_with_highlights(library_id)
55
+ end
56
+ report_failed_ids
57
+ snapshot_page
58
+ end
59
+
60
+ def fetch_book_with_highlights(library_id)
61
+ log "Fetching highlights for the book #{library_id}"
62
+ session.first("##{library_id}").click
63
+ wait_for_selector('#annotations .kp-notebook-annotation-container', wait_time: 10)
64
+ title = doc.css('#annotations .kp-notebook-annotation-container h3.kp-notebook-metadata').try!(:text)
65
+ highlights_count, notes_count = fetch_highlights_and_notes
66
+ snapshot_page("Saving page for [#{title}] (#{library_id}) highlights:#{highlights_count} notes:#{notes_count}")
67
+ if title.present?
68
+ self.loaded_library_ids << library_id
69
+ else
70
+ self.failed_library_ids << library_id
71
+ log "[ERROR] Failed to load #{library_id} or this book doesn't have any highlights and notes"
72
+ end
73
+ end
74
+
75
+ def fetch_highlights_and_notes
76
+ highlights_count = notes_count = nil
77
+ 10.times do
78
+ sleep(1)
79
+ highlights_count = doc.css('#annotations .kp-notebook-annotation-container #kp-notebook-highlights-count').try!(:text)
80
+ notes_count = doc.css('#annotations .kp-notebook-annotation-container #kp-notebook-notes-count').try!(:text)
81
+ break if highlights_count != '--' && notes_count != '--'
82
+ end
83
+ [highlights_count, notes_count]
84
+ end
85
+
86
+ def report_failed_ids
87
+ log("May have failed with #{failed_library_ids.inspect}. Retry with client.adapter.session.first('#B000000000').click") if failed_library_ids.size > 0
88
+ end
89
+
90
+ def load
91
+ books = []
92
+ store.list_html_files.each do |file|
93
+ parser = KindleManager::HighlightsParser.new(file)
94
+ books += parser.parse
95
+ end
96
+ books.reject(&:invalid?).uniq(&:asin)
97
+ end
98
+
99
+ def snapshot_page(message = nil)
100
+ store.record_page
101
+ log(message.presence || "Saving page")
102
+ end
103
+ end
104
+ end
@@ -2,129 +2,50 @@ module KindleManager
2
2
  class Client
3
3
  include AmazonAuth::CommonExtension
4
4
 
5
- attr_accessor :session
5
+ attr_accessor :adapter
6
6
 
7
7
  def initialize(options = {})
8
- @limit = options.fetch(:limit, nil)
9
- @max_scroll_attempts = options.fetch(:max_scroll_attempts, 20)
10
8
  @options = options
11
9
  @client = AmazonAuth::Client.new(@options)
12
10
  extend(AmazonAuth::SessionExtension)
13
11
  end
14
12
 
15
13
  def session
16
- @session ||= @client.session
14
+ @_session ||= @client.session
17
15
  end
18
16
 
19
- def store
20
- @store ||= KindleManager::FileStore.new(@options)
21
- end
22
-
23
- def setup_file_store
24
- store.session = session
25
- log "Directory for downloaded pages is #{store.base_dir}"
17
+ def sign_in
18
+ @client.sign_in
26
19
  end
27
20
 
28
21
  def fetch_kindle_list
29
22
  sign_in
30
- setup_file_store
31
- go_to_kindle_management_page
32
- begin
33
- load_next_kindle_list
34
- rescue => e
35
- puts "[ERROR] #{e}"
36
- puts e.backtrace
37
- puts
38
- puts "Retry manually -> load_next_kindle_list or session etc."
39
- end
23
+ set_adapter(:books, @options.merge(session: session))
24
+ adapter.fetch
40
25
  end
41
26
 
42
- def load_kindle_books
43
- books = []
44
- store.list_html_files.each do |file|
45
- parser = KindleManager::ListParser.new(file)
46
- books += parser.book_list
47
- end
48
- books.uniq(&:asin)
49
- end
50
-
51
- def sign_in
52
- @client.sign_in
27
+ def fetch_kindle_highlights
28
+ sign_in
29
+ set_adapter(:highlights, @options.merge(session: session))
30
+ adapter.fetch
53
31
  end
54
32
 
55
- def go_to_kindle_management_page
56
- log "Visiting kindle management page"
57
- wait_for_selector('#shopAllLinks', wait_time: 5)
58
- 3.times do
59
- link = links_for('#navFooter a').find{|link| link =~ %r{/gp/digital/fiona/manage/} }
60
- session.visit link
61
- wait_for_selector('.navHeader_myx')
62
- if session.first('.navHeader_myx')
63
- log "Page found '#{session.first('.navHeader_myx').text}'"
64
- break
65
- end
66
- end
33
+ def load_kindle_books
34
+ set_adapter(:books, @options.except(:create))
35
+ adapter.load
67
36
  end
68
37
 
69
- def load_next_kindle_list
70
- wait_for_selector('.contentCount_myx')
71
- @current_loop = 0
72
- while @current_loop <= @max_scroll_attempts
73
- if @limit && @limit < number_of_fetched_books
74
- break
75
- elsif has_more_button?
76
- snapshot_page
77
- @current_loop = 0
78
-
79
- log "Clicking 'Show More'"
80
- session.execute_script "window.scrollBy(0,-800)"
81
- show_more_button.click
82
- sleep 1
83
- raise('Clicking of more button may have failed') if has_more_button?
84
- else
85
- log "Loading books with scrolling #{@current_loop+1}"
86
- session.execute_script "window.scrollBy(0,10000)"
87
- end
88
- sleep fetching_interval
89
- @current_loop += 1
90
- end
91
- log "Stopped loading"
92
- snapshot_page
38
+ def load_kindle_highlights
39
+ set_adapter(:highlights, @options.except(:create))
40
+ adapter.load
93
41
  end
94
42
 
95
43
  def quit
96
44
  session.driver.quit
97
45
  end
98
46
 
99
- def has_more_button?
100
- !!show_more_button
101
- end
102
-
103
- def show_more_button
104
- session.all('#contentTable_showMore_myx').find{|e| e['outerHTML'].match(/showmore_button/) }
105
- end
106
-
107
- def number_of_fetched_books
108
- re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)〜(\d+)/ : /(\d+) - (\d+)/)
109
- wait_for_selector('.contentCount_myx')
110
- text = session.first('.contentCount_myx').text
111
- m = text.match(re)
112
- return m[2].to_i if m.present?
113
- raise("Couldn't get the number of fetched books [#{text}]")
114
- end
115
-
116
- def loading?
117
- session.first('.myx-popover-loading-wrapper').present?
118
- end
119
-
120
- def snapshot_page
121
- log "Current page [#{session.first('.contentCount_myx').text}]" if session.first('.contentCount_myx')
122
- store.record_page
123
- log "Saving page"
124
- end
125
-
126
- def fetching_interval
127
- @options.fetch(:fetching_interval, 3)
47
+ def set_adapter(type, options)
48
+ @adapter = "KindleManager::#{type.to_s.camelize}Adapter".constantize.new(options.merge(sub_dir: type))
128
49
  end
129
50
  end
130
51
  end
@@ -3,6 +3,7 @@ module KindleManager
3
3
  attr_accessor :dir_name, :session
4
4
 
5
5
  def initialize(options = {})
6
+ @sub_dir = options.fetch(:sub_dir, 'books').to_s
6
7
  @dir_name = options.fetch(:dir_name) do
7
8
  tmp_dir_name = options[:create] ? nil : find_latest_dir_name
8
9
  tmp_dir_name.presence || Time.current.strftime("%Y%m%d%H%M%S")
@@ -10,12 +11,28 @@ module KindleManager
10
11
  @session = options.fetch(:session, nil)
11
12
  end
12
13
 
14
+ def downloads_dir
15
+ 'downloads'
16
+ end
17
+
18
+ def root_dir
19
+ File.join(downloads_dir, @sub_dir)
20
+ end
21
+
13
22
  def base_dir
14
- File.join(self.class.downloads_dir, @dir_name)
23
+ File.join(root_dir, @dir_name)
15
24
  end
16
25
 
17
- def self.downloads_dir
18
- 'downloads'
26
+ def list_work_dirs
27
+ Dir["#{root_dir}/*"].select{|f| File.directory? f }
28
+ end
29
+
30
+ def find_latest_dir_name
31
+ list_work_dirs.sort.last.to_s.split('/').last
32
+ end
33
+
34
+ def list_html_files(dir = nil)
35
+ Dir[File.join(base_dir,'*.html')].select{|f| File.file? f }
19
36
  end
20
37
 
21
38
  def html_path(time)
@@ -32,26 +49,6 @@ module KindleManager
32
49
  @session.save_screenshot(image_path(time))
33
50
  end
34
51
 
35
- def self.list_download_dirs
36
- Dir["#{downloads_dir}/*"].select{|f| File.directory? f }
37
- end
38
-
39
- def self.list_html_files(dir = nil)
40
- if dir
41
- Dir[File.join(downloads_dir, dir,'*.html')].select{|f| File.file? f }
42
- else
43
- Dir["#{downloads_dir}/*/*.html"].select{|f| File.file? f }
44
- end
45
- end
46
-
47
- def list_html_files
48
- self.class.list_html_files(@dir_name)
49
- end
50
-
51
- def find_latest_dir_name
52
- self.class.list_download_dirs.sort.last.to_s.split('/').last
53
- end
54
-
55
52
  private
56
53
 
57
54
  def build_filepath(time, ext)
@@ -0,0 +1,16 @@
1
+ module KindleManager
2
+ class BaseParser
3
+
4
+ def initialize(filepath, options = {})
5
+ @filepath = filepath
6
+ end
7
+
8
+ def doc
9
+ @doc ||= Nokogiri::HTML(body)
10
+ end
11
+
12
+ def body
13
+ @body ||= File.read(@filepath)
14
+ end
15
+ end
16
+ end
@@ -1,6 +1,8 @@
1
1
  module KindleManager
2
- class ListParser
2
+ class BooksParser < BaseParser
3
3
  class BookRow
4
+ include KindleManager::Parsers::Common
5
+
4
6
  def initialize(node)
5
7
  @node = node
6
8
  end
@@ -26,15 +28,7 @@ module KindleManager
26
28
  end
27
29
 
28
30
  def date
29
- @_date ||= begin
30
- date_text = @node.css("div[id^='date']").text
31
- begin
32
- Date.parse(date_text)
33
- rescue ArgumentError => e
34
- m = date_text.match(/\A(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日\z/)
35
- Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
36
- end
37
- end
31
+ @_date ||= parse_date(@node.css("div[id^='date']").text)
38
32
  end
39
33
 
40
34
  def collection_count
@@ -50,20 +44,8 @@ module KindleManager
50
44
  end
51
45
  end
52
46
 
53
- def initialize(filepath, options = {})
54
- @filepath = filepath
55
- end
56
-
57
- def book_list
58
- @book_list ||= doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e) }
59
- end
60
-
61
- def doc
62
- @doc ||= Nokogiri::HTML(body)
63
- end
64
-
65
- def body
66
- @body ||= File.read(@filepath)
47
+ def parse
48
+ @_parsed ||= doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e) }
67
49
  end
68
50
  end
69
51
  end
@@ -0,0 +1,16 @@
1
+ module KindleManager
2
+ module Parsers
3
+ module Common
4
+
5
+ def parse_date(date_text)
6
+ begin
7
+ Date.parse(date_text)
8
+ rescue ArgumentError => e
9
+ m = date_text.match(/\A(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日\z/)
10
+ m = date_text.match(/(?<month>\d{1,2})月\D+(?<day>\d{1,2}),\D+(?<year>\d{4})/) if m.nil?
11
+ Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,91 @@
1
+ module KindleManager
2
+ class HighlightsParser < BaseParser
3
+ class BookWithNote
4
+ include KindleManager::Parsers::Common
5
+
6
+ def initialize(node)
7
+ @node = node
8
+ end
9
+
10
+ def inspect
11
+ "#<#{self.class.name}:#{self.object_id} #{self.to_hash}>"
12
+ end
13
+
14
+ def asin
15
+ @_asin ||= @node.css('#kp-notebook-annotations-asin').first['value']
16
+ end
17
+
18
+ def title
19
+ @_title ||= @node.css('h3.kp-notebook-metadata').text
20
+ end
21
+
22
+ def author
23
+ @_author ||= @node.css('h1.kp-notebook-metadata').first.text
24
+ end
25
+
26
+ def last_annotated_on
27
+ @_last_annotated_on ||= parse_date(@node.css('#kp-notebook-annotated-date').text)
28
+ end
29
+
30
+ def highlights_count
31
+ @_highlights_count ||= @node.css('.kp-notebook-highlight').size
32
+ end
33
+
34
+ def notes_count
35
+ @_notes_count ||= @node.css('.kp-notebook-note').reject{|e| e['class'] =~ /aok-hidden/ }.size
36
+ end
37
+
38
+ def highlights_and_notes
39
+ @_highlights_and_notes ||= begin
40
+ # Excluding the first element which has book info
41
+ @node.css('.a-spacing-base')[1..-1].map do |node|
42
+ location = node.css('#kp-annotation-location').first['value'].to_i
43
+ highlight_node = node.css('.kp-notebook-highlight').first
44
+ highlight = highlight_node && highlight_node.css('#highlight').first.text
45
+ color = highlight_node && highlight_node['class'].split.find{|v| v =~ /kp-notebook-highlight-/ }.split('-').last
46
+ note = node.css('#note').first.text
47
+ {'location' => location, 'highlight' => highlight, 'color' => color, 'note' => note}
48
+ end
49
+ end
50
+ end
51
+
52
+ def highlights
53
+ highlights_and_notes.reject{|e| e['highlight'].blank? }
54
+ end
55
+
56
+ def notes
57
+ highlights_and_notes.reject{|e| e['note'].blank? }
58
+ end
59
+
60
+ # This can be used to verify the count of hightlights and notes
61
+ def count_summary
62
+ @_count_summary ||= begin
63
+ text = @node.css('h1.kp-notebook-metadata').last.text.strip
64
+ a, b = text.split('|').map{|text| m = text.match(/\d+/); m.nil? ? nil : m[0].to_i }
65
+ {'text' => text, 'highlights_count' => a, 'notes_count' => b}
66
+ end
67
+ end
68
+
69
+ def to_hash
70
+ hash = {}
71
+ %w[asin title author last_annotated_on highlights_count notes_count highlights_and_notes].each do |f|
72
+ hash[f] = send(f)
73
+ end
74
+ hash
75
+ end
76
+
77
+ def invalid?
78
+ !!(asin.blank? || count_summary['text'] =~ /--/)
79
+ end
80
+ end
81
+
82
+ def parse
83
+ @_parsed ||= begin
84
+ result = doc.css('.kp-notebook-annotation-container').map{|e| BookWithNote.new(e) }
85
+ puts "[DEBUG] This page(#{@filepath}) has many books. asin -> #{result.map(&:asin).join(',')}" if result.size >= 2
86
+ puts "[DEBUG] Incomplete page(#{@filepath}). asin:#{result.first.asin} #{result.first.title} (#{result.first.count_summary['text'].inspect})" if result.any?(&:invalid?)
87
+ result
88
+ end
89
+ end
90
+ end
91
+ end
@@ -1,3 +1,3 @@
1
1
  module KindleManager
2
- VERSION = "0.2.2"
2
+ VERSION = "0.3.0"
3
3
  end
@@ -1,8 +1,14 @@
1
1
  require 'amazon_auth'
2
2
  require "kindle_manager/version"
3
+ require "kindle_manager/adapters/base_adapter"
4
+ require "kindle_manager/adapters/books_adapter"
5
+ require "kindle_manager/adapters/highlights_adapter"
3
6
  require "kindle_manager/client"
4
7
  require "kindle_manager/file_store"
5
- require "kindle_manager/list_parser"
8
+ require "kindle_manager/parsers/common"
9
+ require "kindle_manager/parsers/base_parser"
10
+ require "kindle_manager/parsers/books_parser"
11
+ require "kindle_manager/parsers/highlights_parser"
6
12
 
7
13
  module KindleManager
8
14
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kindle_manager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kazuho Yamaguchi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-06-17 00:00:00.000000000 Z
11
+ date: 2017-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: amazon_auth
@@ -98,9 +98,15 @@ files:
98
98
  - bin/setup
99
99
  - kindle_manager.gemspec
100
100
  - lib/kindle_manager.rb
101
+ - lib/kindle_manager/adapters/base_adapter.rb
102
+ - lib/kindle_manager/adapters/books_adapter.rb
103
+ - lib/kindle_manager/adapters/highlights_adapter.rb
101
104
  - lib/kindle_manager/client.rb
102
105
  - lib/kindle_manager/file_store.rb
103
- - lib/kindle_manager/list_parser.rb
106
+ - lib/kindle_manager/parsers/base_parser.rb
107
+ - lib/kindle_manager/parsers/books_parser.rb
108
+ - lib/kindle_manager/parsers/common.rb
109
+ - lib/kindle_manager/parsers/highlights_parser.rb
104
110
  - lib/kindle_manager/version.rb
105
111
  homepage: https://github.com/kyamaguchi/kindle_manager
106
112
  licenses: