kindle_manager 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 73c95f02f9f63d413e423a014c592451b3856fbe
4
- data.tar.gz: 12d55a94e7a21ea0b748619a1a936d3a02caeaab
3
+ metadata.gz: 5df40793942c890de33041d60b6e27efe076a695
4
+ data.tar.gz: 31a5cbce41c6b166de7954cb876f041fc4ddf3ec
5
5
  SHA512:
6
- metadata.gz: c3f7f1da0d291473403a887fca2537f29843e73cd1400e40d52090d9bad9bead1e21b4b7f86f73c88e5c9a23f397ee6c7b8f66d1b93f39972c361690e66a3f06
7
- data.tar.gz: 49a31303b797f8c844133b36e37a0523aea887f72ef69ec61626df06321693ae2d89bb669415b19ea3fbc3a9f07ca89636bc0bd8425086476277d87ad1b1a408
6
+ metadata.gz: 4d9ac40336b50f1f6f10c201f11eef301eb11ae373e8bd9fd88c5167edd00e4cf6edc95fda3b82f2436d440d1dff8f9ca8e459708967d66fd0756086927a6b30
7
+ data.tar.gz: 315a3ae71152310075fe4d855355bd93c280af92c6d400f2bbb5ae2333941380f598b8092373f3aa3d63b0a371162e1a050407e772b437abe181a96b28b17c11
data/.gitignore CHANGED
@@ -10,8 +10,8 @@
10
10
  /.env
11
11
  /.byebug_history
12
12
  /.ruby-version
13
- /downloads/
14
13
  /spec/fixtures/files/
14
+ /spec/fixtures/tmp/
15
15
 
16
16
  # rspec failure tracking
17
17
  .rspec_status
data/README.md CHANGED
@@ -54,7 +54,7 @@ In console
54
54
 
55
55
  ```ruby
56
56
  require 'kindle_manager'
57
- client = KindleManager::Client.new(verbose: true, limit: 1000)
57
+ client = KindleManager::Client.new(keep_cookie: true, verbose: true, limit: 1000)
58
58
  client.fetch_kindle_list
59
59
 
60
60
  books = client.load_kindle_books
@@ -89,7 +89,7 @@ In console
89
89
 
90
90
  ```ruby
91
91
  require 'kindle_manager'
92
- client = KindleManager::Client.new(verbose: true, limit: 10)
92
+ client = KindleManager::Client.new(keep_cookie: true, verbose: true, limit: 10)
93
93
  client.fetch_kindle_highlights
94
94
 
95
95
  books = client.load_kindle_highlights
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
22
  spec.require_paths = ["lib"]
23
23
 
24
- spec.add_runtime_dependency "amazon_auth", "~> 0.3.2"
24
+ spec.add_runtime_dependency "amazon_auth", "~> 0.4.0"
25
25
  spec.add_development_dependency "bundler", "~> 1.14"
26
26
  spec.add_development_dependency "rake", "~> 10.0"
27
27
  spec.add_development_dependency "rspec", "~> 3.0"
@@ -10,7 +10,7 @@ module KindleManager
10
10
  extend(AmazonAuth::SessionExtension)
11
11
 
12
12
  @store = KindleManager::FileStore.new(options.merge(session: @session))
13
- log "Directory for downloaded pages is #{store.base_dir}"
13
+ log "Directory for downloaded pages is #{store.target_dir}"
14
14
  end
15
15
 
16
16
  def limit
@@ -22,6 +22,8 @@ module KindleManager
22
22
  if session.first('.navHeader_myx')
23
23
  log "Page found '#{session.first('.navHeader_myx').text}'"
24
24
  break
25
+ else
26
+ submit_signin_form
25
27
  end
26
28
  end
27
29
  end
@@ -29,10 +31,10 @@ module KindleManager
29
31
  def load_next_kindle_list
30
32
  wait_for_selector('.contentCount_myx')
31
33
  current_loop = 0
34
+ last_page_scroll_offset = page_scroll_offset
32
35
  while current_loop <= max_scroll_attempts
33
- if limit && limit < number_of_fetched_books
34
- break
35
- elsif has_more_button?
36
+ break if limit && limit < number_of_fetched_books
37
+ if has_more_button?
36
38
  snapshot_page
37
39
  current_loop = 0
38
40
 
@@ -46,9 +48,16 @@ module KindleManager
46
48
  session.execute_script "window.scrollBy(0,10000)"
47
49
  end
48
50
  sleep fetching_interval
51
+ if last_page_scroll_offset == page_scroll_offset
52
+ log "Stopping loading because 'page_scroll_offset' didn't change after a loop"
53
+ break
54
+ else
55
+ debug "last_page_scroll_offset:#{last_page_scroll_offset} new page_scroll_offset:#{page_scroll_offset}"
56
+ end
57
+ last_page_scroll_offset = page_scroll_offset
49
58
  current_loop += 1
50
59
  end
51
- log "Stopped loading"
60
+ log "Stopped loading. You may want to resume with 'client.adapter.load_next_kindle_list'"
52
61
  snapshot_page
53
62
  end
54
63
 
@@ -58,7 +67,11 @@ module KindleManager
58
67
  parser = KindleManager::BooksParser.new(file)
59
68
  books += parser.parse
60
69
  end
61
- books.uniq(&:asin)
70
+ books.sort_by{|b| [-b.date.to_time.to_i, -b.fetched_at.to_i] }.uniq(&:asin)
71
+ end
72
+
73
+ def page_scroll_offset
74
+ session.evaluate_script('window.pageYOffset')
62
75
  end
63
76
 
64
77
  def has_more_button?
@@ -100,7 +100,7 @@ module KindleManager
100
100
  parser = KindleManager::HighlightsParser.new(file)
101
101
  books += parser.parse
102
102
  end
103
- books.reject(&:invalid?).uniq(&:asin)
103
+ books.reject(&:invalid?).sort_by{|b| [-b.last_annotated_on.to_time.to_i, -b.fetched_at.to_i] }.uniq(&:asin)
104
104
  end
105
105
 
106
106
  def snapshot_page(message = nil)
@@ -2,7 +2,7 @@ module KindleManager
2
2
  class Client
3
3
  include AmazonAuth::CommonExtension
4
4
 
5
- attr_accessor :adapter
5
+ attr_accessor :adapter, :options
6
6
 
7
7
  def initialize(options = {})
8
8
  @options = options
@@ -14,10 +14,6 @@ module KindleManager
14
14
  @_session ||= @client.session
15
15
  end
16
16
 
17
- def sign_in
18
- @client.sign_in
19
- end
20
-
21
17
  def fetch_kindle_list
22
18
  sign_in
23
19
  set_adapter(:books, @options.merge(session: session))
@@ -25,8 +21,7 @@ module KindleManager
25
21
  end
26
22
 
27
23
  def fetch_kindle_highlights
28
- session.visit KindleManager::HighlightsAdapter::KINDLE_HIGHLIGHT_URL
29
- @client.submit_signin_form
24
+ sign_in KindleManager::HighlightsAdapter::KINDLE_HIGHLIGHT_URL
30
25
  set_adapter(:highlights, @options.merge(session: session))
31
26
  adapter.fetch
32
27
  end
@@ -1,6 +1,8 @@
1
1
  module KindleManager
2
2
  class FileStore
3
- attr_accessor :dir_name, :session
3
+ TIME_FORMAT_FOR_FILENAME = '%Y%m%d%H%M%S'
4
+
5
+ attr_accessor :sub_dir, :dir_name, :session
4
6
 
5
7
  def initialize(options = {})
6
8
  @sub_dir = options.fetch(:sub_dir, 'books').to_s
@@ -11,28 +13,20 @@ module KindleManager
11
13
  @session = options.fetch(:session, nil)
12
14
  end
13
15
 
14
- def downloads_dir
15
- 'downloads'
16
- end
17
-
18
- def root_dir
19
- File.join(downloads_dir, @sub_dir)
20
- end
21
-
22
- def base_dir
23
- File.join(root_dir, @dir_name)
16
+ def target_dir
17
+ File.join(sub_dir, dir_name)
24
18
  end
25
19
 
26
20
  def list_work_dirs
27
- Dir["#{root_dir}/*"].select{|f| File.directory? f }
21
+ Dir[File.join(Capybara.save_path, sub_dir,'*')].select{|f| File.directory? f }
28
22
  end
29
23
 
30
24
  def find_latest_dir_name
31
25
  list_work_dirs.sort.last.to_s.split('/').last
32
26
  end
33
27
 
34
- def list_html_files(dir = nil)
35
- Dir[File.join(base_dir,'*.html')].select{|f| File.file? f }
28
+ def list_html_files
29
+ Dir[File.join(Capybara.save_path, target_dir,'*.html')].select{|f| File.file? f }
36
30
  end
37
31
 
38
32
  def html_path(time)
@@ -52,7 +46,7 @@ module KindleManager
52
46
  private
53
47
 
54
48
  def build_filepath(time, ext)
55
- File.join(base_dir, "#{time.strftime('%Y%m%d%H%M%S')}#{(time.usec / 1000.0).round.to_s.rjust(3,'0')}.#{ext}")
49
+ File.join(target_dir, "#{time.strftime(TIME_FORMAT_FOR_FILENAME)}#{(time.usec / 1000.0).round.to_s.rjust(3,'0')}.#{ext}")
56
50
  end
57
51
  end
58
52
  end
@@ -1,8 +1,15 @@
1
1
  module KindleManager
2
2
  class BaseParser
3
+ attr_accessor :fetched_at
3
4
 
4
5
  def initialize(filepath, options = {})
5
6
  @filepath = filepath
7
+
8
+ @fetched_at = if File.basename(@filepath) =~ /\A\d{14}/
9
+ Time.strptime(File.basename(@filepath)[0..14], KindleManager::FileStore::TIME_FORMAT_FOR_FILENAME)
10
+ else
11
+ File.ctime(@filepath)
12
+ end
6
13
  end
7
14
 
8
15
  def doc
@@ -1,10 +1,12 @@
1
1
  module KindleManager
2
2
  class BooksParser < BaseParser
3
3
  class BookRow
4
+
4
5
  include KindleManager::Parsers::Common
5
6
 
6
- def initialize(node)
7
+ def initialize(node, options = {})
7
8
  @node = node
9
+ @fetched_at = options[:fetched_at]
8
10
  end
9
11
 
10
12
  def inspect
@@ -45,7 +47,9 @@ module KindleManager
45
47
  end
46
48
 
47
49
  def parse
48
- @_parsed ||= doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e) }
50
+ @_parsed ||= begin
51
+ doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e, fetched_at: fetched_at) }
52
+ end
49
53
  end
50
54
  end
51
55
  end
@@ -1,6 +1,11 @@
1
1
  module KindleManager
2
2
  module Parsers
3
3
  module Common
4
+ extend ActiveSupport::Concern
5
+
6
+ included do
7
+ attr_accessor :fetched_at
8
+ end
4
9
 
5
10
  def parse_date(date_text)
6
11
  begin
@@ -3,8 +3,9 @@ module KindleManager
3
3
  class BookWithNote
4
4
  include KindleManager::Parsers::Common
5
5
 
6
- def initialize(node)
6
+ def initialize(node, options = {})
7
7
  @node = node
8
+ @fetched_at = options[:fetched_at]
8
9
  end
9
10
 
10
11
  def inspect
@@ -81,7 +82,7 @@ module KindleManager
81
82
 
82
83
  def parse
83
84
  @_parsed ||= begin
84
- result = doc.css('.kp-notebook-annotation-container').map{|e| BookWithNote.new(e) }
85
+ result = doc.css('.kp-notebook-annotation-container').map{|e| BookWithNote.new(e, fetched_at: fetched_at) }
85
86
  puts "[DEBUG] This page(#{@filepath}) has many books. asin -> #{result.map(&:asin).join(',')}" if result.size >= 2
86
87
  puts "[DEBUG] Incomplete page(#{@filepath}). asin:#{result.first.asin} #{result.first.title} (#{result.first.count_summary['text'].inspect})" if result.any?(&:invalid?)
87
88
  result
@@ -1,3 +1,3 @@
1
1
  module KindleManager
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kindle_manager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kazuho Yamaguchi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-07-05 00:00:00.000000000 Z
11
+ date: 2017-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: amazon_auth
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.3.2
19
+ version: 0.4.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.3.2
26
+ version: 0.4.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement