kindle_manager 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 73c95f02f9f63d413e423a014c592451b3856fbe
4
- data.tar.gz: 12d55a94e7a21ea0b748619a1a936d3a02caeaab
3
+ metadata.gz: 5df40793942c890de33041d60b6e27efe076a695
4
+ data.tar.gz: 31a5cbce41c6b166de7954cb876f041fc4ddf3ec
5
5
  SHA512:
6
- metadata.gz: c3f7f1da0d291473403a887fca2537f29843e73cd1400e40d52090d9bad9bead1e21b4b7f86f73c88e5c9a23f397ee6c7b8f66d1b93f39972c361690e66a3f06
7
- data.tar.gz: 49a31303b797f8c844133b36e37a0523aea887f72ef69ec61626df06321693ae2d89bb669415b19ea3fbc3a9f07ca89636bc0bd8425086476277d87ad1b1a408
6
+ metadata.gz: 4d9ac40336b50f1f6f10c201f11eef301eb11ae373e8bd9fd88c5167edd00e4cf6edc95fda3b82f2436d440d1dff8f9ca8e459708967d66fd0756086927a6b30
7
+ data.tar.gz: 315a3ae71152310075fe4d855355bd93c280af92c6d400f2bbb5ae2333941380f598b8092373f3aa3d63b0a371162e1a050407e772b437abe181a96b28b17c11
data/.gitignore CHANGED
@@ -10,8 +10,8 @@
10
10
  /.env
11
11
  /.byebug_history
12
12
  /.ruby-version
13
- /downloads/
14
13
  /spec/fixtures/files/
14
+ /spec/fixtures/tmp/
15
15
 
16
16
  # rspec failure tracking
17
17
  .rspec_status
data/README.md CHANGED
@@ -54,7 +54,7 @@ In console
54
54
 
55
55
  ```ruby
56
56
  require 'kindle_manager'
57
- client = KindleManager::Client.new(verbose: true, limit: 1000)
57
+ client = KindleManager::Client.new(keep_cookie: true, verbose: true, limit: 1000)
58
58
  client.fetch_kindle_list
59
59
 
60
60
  books = client.load_kindle_books
@@ -89,7 +89,7 @@ In console
89
89
 
90
90
  ```ruby
91
91
  require 'kindle_manager'
92
- client = KindleManager::Client.new(verbose: true, limit: 10)
92
+ client = KindleManager::Client.new(keep_cookie: true, verbose: true, limit: 10)
93
93
  client.fetch_kindle_highlights
94
94
 
95
95
  books = client.load_kindle_highlights
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
22
  spec.require_paths = ["lib"]
23
23
 
24
- spec.add_runtime_dependency "amazon_auth", "~> 0.3.2"
24
+ spec.add_runtime_dependency "amazon_auth", "~> 0.4.0"
25
25
  spec.add_development_dependency "bundler", "~> 1.14"
26
26
  spec.add_development_dependency "rake", "~> 10.0"
27
27
  spec.add_development_dependency "rspec", "~> 3.0"
@@ -10,7 +10,7 @@ module KindleManager
10
10
  extend(AmazonAuth::SessionExtension)
11
11
 
12
12
  @store = KindleManager::FileStore.new(options.merge(session: @session))
13
- log "Directory for downloaded pages is #{store.base_dir}"
13
+ log "Directory for downloaded pages is #{store.target_dir}"
14
14
  end
15
15
 
16
16
  def limit
@@ -22,6 +22,8 @@ module KindleManager
22
22
  if session.first('.navHeader_myx')
23
23
  log "Page found '#{session.first('.navHeader_myx').text}'"
24
24
  break
25
+ else
26
+ submit_signin_form
25
27
  end
26
28
  end
27
29
  end
@@ -29,10 +31,10 @@ module KindleManager
29
31
  def load_next_kindle_list
30
32
  wait_for_selector('.contentCount_myx')
31
33
  current_loop = 0
34
+ last_page_scroll_offset = page_scroll_offset
32
35
  while current_loop <= max_scroll_attempts
33
- if limit && limit < number_of_fetched_books
34
- break
35
- elsif has_more_button?
36
+ break if limit && limit < number_of_fetched_books
37
+ if has_more_button?
36
38
  snapshot_page
37
39
  current_loop = 0
38
40
 
@@ -46,9 +48,16 @@ module KindleManager
46
48
  session.execute_script "window.scrollBy(0,10000)"
47
49
  end
48
50
  sleep fetching_interval
51
+ if last_page_scroll_offset == page_scroll_offset
52
+ log "Stopping loading because 'page_scroll_offset' didn't change after a loop"
53
+ break
54
+ else
55
+ debug "last_page_scroll_offset:#{last_page_scroll_offset} new page_scroll_offset:#{page_scroll_offset}"
56
+ end
57
+ last_page_scroll_offset = page_scroll_offset
49
58
  current_loop += 1
50
59
  end
51
- log "Stopped loading"
60
+ log "Stopped loading. You may want to resume with 'client.adapter.load_next_kindle_list'"
52
61
  snapshot_page
53
62
  end
54
63
 
@@ -58,7 +67,11 @@ module KindleManager
58
67
  parser = KindleManager::BooksParser.new(file)
59
68
  books += parser.parse
60
69
  end
61
- books.uniq(&:asin)
70
+ books.sort_by{|b| [-b.date.to_time.to_i, -b.fetched_at.to_i] }.uniq(&:asin)
71
+ end
72
+
73
+ def page_scroll_offset
74
+ session.evaluate_script('window.pageYOffset')
62
75
  end
63
76
 
64
77
  def has_more_button?
@@ -100,7 +100,7 @@ module KindleManager
100
100
  parser = KindleManager::HighlightsParser.new(file)
101
101
  books += parser.parse
102
102
  end
103
- books.reject(&:invalid?).uniq(&:asin)
103
+ books.reject(&:invalid?).sort_by{|b| [-b.last_annotated_on.to_time.to_i, -b.fetched_at.to_i] }.uniq(&:asin)
104
104
  end
105
105
 
106
106
  def snapshot_page(message = nil)
@@ -2,7 +2,7 @@ module KindleManager
2
2
  class Client
3
3
  include AmazonAuth::CommonExtension
4
4
 
5
- attr_accessor :adapter
5
+ attr_accessor :adapter, :options
6
6
 
7
7
  def initialize(options = {})
8
8
  @options = options
@@ -14,10 +14,6 @@ module KindleManager
14
14
  @_session ||= @client.session
15
15
  end
16
16
 
17
- def sign_in
18
- @client.sign_in
19
- end
20
-
21
17
  def fetch_kindle_list
22
18
  sign_in
23
19
  set_adapter(:books, @options.merge(session: session))
@@ -25,8 +21,7 @@ module KindleManager
25
21
  end
26
22
 
27
23
  def fetch_kindle_highlights
28
- session.visit KindleManager::HighlightsAdapter::KINDLE_HIGHLIGHT_URL
29
- @client.submit_signin_form
24
+ sign_in KindleManager::HighlightsAdapter::KINDLE_HIGHLIGHT_URL
30
25
  set_adapter(:highlights, @options.merge(session: session))
31
26
  adapter.fetch
32
27
  end
@@ -1,6 +1,8 @@
1
1
  module KindleManager
2
2
  class FileStore
3
- attr_accessor :dir_name, :session
3
+ TIME_FORMAT_FOR_FILENAME = '%Y%m%d%H%M%S'
4
+
5
+ attr_accessor :sub_dir, :dir_name, :session
4
6
 
5
7
  def initialize(options = {})
6
8
  @sub_dir = options.fetch(:sub_dir, 'books').to_s
@@ -11,28 +13,20 @@ module KindleManager
11
13
  @session = options.fetch(:session, nil)
12
14
  end
13
15
 
14
- def downloads_dir
15
- 'downloads'
16
- end
17
-
18
- def root_dir
19
- File.join(downloads_dir, @sub_dir)
20
- end
21
-
22
- def base_dir
23
- File.join(root_dir, @dir_name)
16
+ def target_dir
17
+ File.join(sub_dir, dir_name)
24
18
  end
25
19
 
26
20
  def list_work_dirs
27
- Dir["#{root_dir}/*"].select{|f| File.directory? f }
21
+ Dir[File.join(Capybara.save_path, sub_dir,'*')].select{|f| File.directory? f }
28
22
  end
29
23
 
30
24
  def find_latest_dir_name
31
25
  list_work_dirs.sort.last.to_s.split('/').last
32
26
  end
33
27
 
34
- def list_html_files(dir = nil)
35
- Dir[File.join(base_dir,'*.html')].select{|f| File.file? f }
28
+ def list_html_files
29
+ Dir[File.join(Capybara.save_path, target_dir,'*.html')].select{|f| File.file? f }
36
30
  end
37
31
 
38
32
  def html_path(time)
@@ -52,7 +46,7 @@ module KindleManager
52
46
  private
53
47
 
54
48
  def build_filepath(time, ext)
55
- File.join(base_dir, "#{time.strftime('%Y%m%d%H%M%S')}#{(time.usec / 1000.0).round.to_s.rjust(3,'0')}.#{ext}")
49
+ File.join(target_dir, "#{time.strftime(TIME_FORMAT_FOR_FILENAME)}#{(time.usec / 1000.0).round.to_s.rjust(3,'0')}.#{ext}")
56
50
  end
57
51
  end
58
52
  end
@@ -1,8 +1,15 @@
1
1
  module KindleManager
2
2
  class BaseParser
3
+ attr_accessor :fetched_at
3
4
 
4
5
  def initialize(filepath, options = {})
5
6
  @filepath = filepath
7
+
8
+ @fetched_at = if File.basename(@filepath) =~ /\A\d{14}/
9
+ Time.strptime(File.basename(@filepath)[0..14], KindleManager::FileStore::TIME_FORMAT_FOR_FILENAME)
10
+ else
11
+ File.ctime(@filepath)
12
+ end
6
13
  end
7
14
 
8
15
  def doc
@@ -1,10 +1,12 @@
1
1
  module KindleManager
2
2
  class BooksParser < BaseParser
3
3
  class BookRow
4
+
4
5
  include KindleManager::Parsers::Common
5
6
 
6
- def initialize(node)
7
+ def initialize(node, options = {})
7
8
  @node = node
9
+ @fetched_at = options[:fetched_at]
8
10
  end
9
11
 
10
12
  def inspect
@@ -45,7 +47,9 @@ module KindleManager
45
47
  end
46
48
 
47
49
  def parse
48
- @_parsed ||= doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e) }
50
+ @_parsed ||= begin
51
+ doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e, fetched_at: fetched_at) }
52
+ end
49
53
  end
50
54
  end
51
55
  end
@@ -1,6 +1,11 @@
1
1
  module KindleManager
2
2
  module Parsers
3
3
  module Common
4
+ extend ActiveSupport::Concern
5
+
6
+ included do
7
+ attr_accessor :fetched_at
8
+ end
4
9
 
5
10
  def parse_date(date_text)
6
11
  begin
@@ -3,8 +3,9 @@ module KindleManager
3
3
  class BookWithNote
4
4
  include KindleManager::Parsers::Common
5
5
 
6
- def initialize(node)
6
+ def initialize(node, options = {})
7
7
  @node = node
8
+ @fetched_at = options[:fetched_at]
8
9
  end
9
10
 
10
11
  def inspect
@@ -81,7 +82,7 @@ module KindleManager
81
82
 
82
83
  def parse
83
84
  @_parsed ||= begin
84
- result = doc.css('.kp-notebook-annotation-container').map{|e| BookWithNote.new(e) }
85
+ result = doc.css('.kp-notebook-annotation-container').map{|e| BookWithNote.new(e, fetched_at: fetched_at) }
85
86
  puts "[DEBUG] This page(#{@filepath}) has many books. asin -> #{result.map(&:asin).join(',')}" if result.size >= 2
86
87
  puts "[DEBUG] Incomplete page(#{@filepath}). asin:#{result.first.asin} #{result.first.title} (#{result.first.count_summary['text'].inspect})" if result.any?(&:invalid?)
87
88
  result
@@ -1,3 +1,3 @@
1
1
  module KindleManager
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kindle_manager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kazuho Yamaguchi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-07-05 00:00:00.000000000 Z
11
+ date: 2017-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: amazon_auth
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.3.2
19
+ version: 0.4.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.3.2
26
+ version: 0.4.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement