kindle_manager 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 38b4108ec337ceae8173babf86fb0adac4550f06f68aab4913228b411ff9ae85
4
- data.tar.gz: d517f89c420a47a9403d2caafa476f83b6c9f7056dd0963a7ee096f5e3893507
3
+ metadata.gz: a490117c413d9e85320298a280d32da6a1c0af1f97059af1cc89affbd40fc284
4
+ data.tar.gz: 6b1b31d1d72eedbe4605ea6b61ae243b624e36971aba14c31389f092f9a121f8
5
5
  SHA512:
6
- metadata.gz: 55a646ff5b8d6c7487a83ae6468763b783e5787a91a4b12be622c90b603ebbe66a0ac39e4c18c9ad3038a5d4f7a54855ef53123e5db760f49678af37c29925ed
7
- data.tar.gz: 69ae1b4a50c7708625f74b2aa027aee315e81103f06f14f24e4f0b216fdd81ca53d49fe9c1beea177e14b8d740b3df493dd0c5af440dee8b04d5bbddca79d38e
6
+ metadata.gz: a8d03445f3b6a517c1dffba41a214e37340a40e641936521c36905109c5977cfe84f7239f365a18dd503cc07ecc766f31e647f0f3e5666997f43e7e0123b2a74
7
+ data.tar.gz: 5a0fad36eeda000586c97cf343a83792f5ee98c12110fd248a24a5f9327590c01ba16b6c945e44725c7bcb281e5ccd2fb69ad2f71e9fa1b0481b1b571e2a3373
data/README.md CHANGED
@@ -142,11 +142,6 @@ Login and password: `login: 'xxx', password: 'yyy'`
142
142
 
143
143
  Output debug log: `debug: true`
144
144
 
145
- ## Issues
146
-
147
- There may be problems with capybara 3.
148
- Use older version with `gem 'capybara', '~> 2.18.0'` in that case.
149
-
150
145
  ## TODO
151
146
 
152
147
  - Limit the number of fetching books by date
@@ -21,7 +21,8 @@ Gem::Specification.new do |spec|
21
21
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
22
  spec.require_paths = ["lib"]
23
23
 
24
- spec.add_runtime_dependency "amazon_auth", "~> 0.6.0"
24
+ spec.add_runtime_dependency "amazon_auth", "~> 0.8.0"
25
+ spec.add_runtime_dependency "webdrivers"
25
26
  spec.add_development_dependency "bundler"
26
27
  spec.add_development_dependency "rake"
27
28
  spec.add_development_dependency "rspec"
@@ -1,6 +1,8 @@
1
1
  module KindleManager
2
2
  class BooksAdapter < BaseAdapter
3
- URL_FOR_KINDLE_CONTENTS = 'https://www.amazon.co.jp/gp/digital/fiona/manage?ie=UTF8&ref_=nav_youraccount_myk'
3
+ def url_for_kindle_contents
4
+ "https://www.#{ENV['AMAZON_DOMAIN']}/hz/mycd/digital-console/contentlist/booksAll/dateDsc/"
5
+ end
4
6
 
5
7
  def fetch
6
8
  go_to_kindle_management_page
@@ -10,17 +12,17 @@ module KindleManager
10
12
  puts "[ERROR] #{e}"
11
13
  puts e.backtrace
12
14
  puts
13
- puts "Retry manually -> client.adapter.load_next_kindle_list or client.session etc."
15
+ puts "Investigate the error using 'client.session', 'client.doc' etc."
14
16
  end
15
17
  end
16
18
 
17
19
  def go_to_kindle_management_page
18
20
  log "Visiting kindle management page"
19
21
  3.times do
20
- session.visit URL_FOR_KINDLE_CONTENTS
21
- wait_for_selector('.navHeader_title_myx')
22
- if session.has_css?('.navHeader_title_myx')
23
- log "Page found '#{session.first('.navHeader_title_myx').text}'"
22
+ session.visit url_for_kindle_contents
23
+ wait_for_selector('#content-page-title')
24
+ if session.has_css?('#content-page-title')
25
+ log "Page found '#{session.first('#content-page-title').text}'"
24
26
  break
25
27
  else
26
28
  submit_signin_form
@@ -29,34 +31,15 @@ module KindleManager
29
31
  end
30
32
 
31
33
  def load_next_kindle_list
32
- wait_for_selector('.contentCount_myx')
33
- current_loop = 0
34
- last_page_scroll_offset = page_scroll_offset
35
- while current_loop <= max_scroll_attempts
36
- break if limit && limit < number_of_fetched_books
37
- if has_more_button?
38
- snapshot_page
39
- current_loop = 0
40
-
41
- log "Clicking 'Show More'"
42
- session.execute_script "window.scrollBy(0,-800)"
43
- show_more_button.click
44
- sleep fetching_interval
45
- else
46
- log "Loading books with scrolling #{current_loop+1}"
47
- session.execute_script "window.scrollBy(0,10000)"
48
- end
34
+ wait_for_selector('#CONTENT_COUNT')
35
+ loop do
36
+ snapshot_page
37
+ break if current_page == max_page
38
+ break if limit && limit <= number_of_fetched_books
39
+ session.first("#page-#{current_page + 1}").click
49
40
  sleep fetching_interval
50
- if last_page_scroll_offset == page_scroll_offset
51
- log "Stopping loading because 'page_scroll_offset' didn't change after a loop"
52
- break
53
- else
54
- debug "last_page_scroll_offset:#{last_page_scroll_offset} new page_scroll_offset:#{page_scroll_offset}"
55
- end
56
- last_page_scroll_offset = page_scroll_offset
57
- current_loop += 1
58
41
  end
59
- log "Stopped loading. You may want to resume with 'client.adapter.load_next_kindle_list'"
42
+ log "Stopped loading. You may want to continue with 'client.session', 'client.doc' etc."
60
43
  snapshot_page
61
44
  end
62
45
 
@@ -69,33 +52,26 @@ module KindleManager
69
52
  books.sort_by{|b| [-b.date.to_time.to_i, -b.fetched_at.to_i] }.uniq(&:asin)
70
53
  end
71
54
 
72
- def page_scroll_offset
73
- session.evaluate_script('window.pageYOffset')
74
- end
75
-
76
- def has_more_button?
77
- !!show_more_button
55
+ def current_page
56
+ doc.css('#pagination a.active').first.text.to_i
78
57
  end
79
58
 
80
- def show_more_button
81
- session.all('.contentTableShowMore_myx').find{|e| e['outerHTML'].match(/cnt_shw_more/) }
59
+ def max_page
60
+ @_max_page ||= doc.css('#pagination a').last.text.to_i
82
61
  end
83
62
 
84
63
  def number_of_fetched_books
85
- re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)(\d+)/ : /(\d+) - (\d+)/)
86
- wait_for_selector('.contentCount_myx')
87
- text = doc.css('.contentCount_myx').text
64
+ re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)から(\d+)/ : / (\d+) to (\d+) /)
65
+ wait_for_selector('#CONTENT_COUNT')
66
+ text = doc.css('#CONTENT_COUNT').text
67
+ log "Number of books: [#{text}]"
88
68
  m = text.match(re)
89
69
  return m[2].to_i if m.present?
90
70
  raise("Couldn't get the number of fetched books [#{text}]")
91
71
  end
92
72
 
93
- def loading?
94
- session.first('.myx-popover-loading-wrapper').present?
95
- end
96
-
97
73
  def snapshot_page
98
- if (text = doc.css('.contentCount_myx').try!(:text)).present?
74
+ if (text = doc.css('#CONTENT_COUNT').try!(:text)).present?
99
75
  log "Current page [#{text.to_s.gsub(/[[:space:]]+/, ' ').strip}]"
100
76
  end
101
77
  store.record_page
@@ -13,28 +13,33 @@ module KindleManager
13
13
  "#<#{self.class.name}:#{self.object_id} #{self.to_hash}>"
14
14
  end
15
15
 
16
+ def title_node
17
+ # Possible to use "div[id^='content-title-']"
18
+ @_title_node ||= @node.css('.digital_entity_title').first
19
+ end
20
+
16
21
  def asin
17
- @_asin ||= @node['name'].gsub(/\AcontentTabList_/, '')
22
+ @_asin ||= title_node.attributes['id'].value.remove('content-title-')
18
23
  end
19
24
 
20
25
  def title
21
- @_title ||= @node.css("div[id^='title']").text
26
+ @_title ||= title_node.text
22
27
  end
23
28
 
24
29
  def tag
25
- @_tag ||= @node.css("div[id^='listViewTitleTag']").css('.myx-text-bold').first.text.strip
30
+ @_tag ||= @node.css('.information_row.tags').first&.text&.strip
26
31
  end
27
32
 
28
33
  def author
29
- @_author ||= @node.css("div[id^='author']").text
34
+ @_author ||= @node.css("div[id^='content-author-']").text
30
35
  end
31
36
 
32
37
  def date
33
- @_date ||= parse_date(@node.css("div[id^='date']").text)
38
+ @_date ||= parse_date(@node.css("div[id^='content-acquired-date-']").text)
34
39
  end
35
40
 
36
41
  def collection_count
37
- @_collection_count ||= @node.css(".collectionsCount .myx-collection-count").first.text.strip.to_i
42
+ @_collection_count ||= @node.css('.dropdown_count').first&.text&.strip.to_i
38
43
  end
39
44
 
40
45
  def to_hash
@@ -48,7 +53,7 @@ module KindleManager
48
53
 
49
54
  def parse
50
55
  @_parsed ||= begin
51
- doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e, fetched_at: fetched_at) }
56
+ doc.css('#CONTENT_LIST table tbody tr').map{|e| BookRow.new(e, fetched_at: fetched_at) }
52
57
  end
53
58
  end
54
59
  end
@@ -11,7 +11,7 @@ module KindleManager
11
11
  begin
12
12
  Date.parse(date_text)
13
13
  rescue ArgumentError => e
14
- m = date_text.match(/\A(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日/)
14
+ m = date_text.match(/(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日/)
15
15
  m = date_text.match(/(?<month>\d{1,2})月\D+(?<day>\d{1,2}),\D+(?<year>\d{4})/) if m.nil?
16
16
  raise("Failed to parse date [#{date_text}]") if m.nil?
17
17
  Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
@@ -1,3 +1,3 @@
1
1
  module KindleManager
2
- VERSION = "0.7.0"
2
+ VERSION = "0.8.0"
3
3
  end
@@ -9,6 +9,7 @@ require "kindle_manager/parsers/common"
9
9
  require "kindle_manager/parsers/base_parser"
10
10
  require "kindle_manager/parsers/books_parser"
11
11
  require "kindle_manager/parsers/highlights_parser"
12
+ require 'webdrivers'
12
13
 
13
14
  module KindleManager
14
15
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kindle_manager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kazuho Yamaguchi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-12-05 00:00:00.000000000 Z
11
+ date: 2022-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: amazon_auth
@@ -16,14 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.6.0
19
+ version: 0.8.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.6.0
26
+ version: 0.8.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: webdrivers
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -127,7 +141,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
127
141
  - !ruby/object:Gem::Version
128
142
  version: '0'
129
143
  requirements: []
130
- rubygems_version: 3.1.6
144
+ rubygems_version: 3.2.33
131
145
  signing_key:
132
146
  specification_version: 4
133
147
  summary: Scrape information of kindle books from amazon site