kindle_manager 0.6.4 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 94b27bae732e8c8a50d463380a7fbcc499ad5ea46f810bed185ed469f3ae0c8b
4
- data.tar.gz: 8f902074660f6774d95a32e37afafa96e06cbfedfc7cc9ee20b215f3f83364e9
3
+ metadata.gz: a490117c413d9e85320298a280d32da6a1c0af1f97059af1cc89affbd40fc284
4
+ data.tar.gz: 6b1b31d1d72eedbe4605ea6b61ae243b624e36971aba14c31389f092f9a121f8
5
5
  SHA512:
6
- metadata.gz: d759c1c3b06ec52cb47dfc5ebbdfbe29693899758b55b66853c0a6d088edf5c2e53e0a383d7939d6021f64b009c05d03bc3b4d9501753579194bd190dddc0fe0
7
- data.tar.gz: c4bf70715590d1e9c7840e9736a1bfcca77369ed0370af5afa1f31dee2a32b423769f39fd52db71c8a14e915eea78bd2c7d89e122c4b7875587c51540448bb3e
6
+ metadata.gz: a8d03445f3b6a517c1dffba41a214e37340a40e641936521c36905109c5977cfe84f7239f365a18dd503cc07ecc766f31e647f0f3e5666997f43e7e0123b2a74
7
+ data.tar.gz: 5a0fad36eeda000586c97cf343a83792f5ee98c12110fd248a24a5f9327590c01ba16b6c945e44725c7bcb281e5ccd2fb69ad2f71e9fa1b0481b1b571e2a3373
@@ -0,0 +1,21 @@
1
+ version: 2.1
2
+
3
+ jobs:
4
+ test:
5
+ parameters:
6
+ ruby-version:
7
+ type: string
8
+ docker:
9
+ - image: cimg/ruby:<< parameters.ruby-version >>-browsers
10
+ steps:
11
+ - checkout
12
+ - run: bundle install
13
+ - run: bundle exec rspec
14
+
15
+ workflows:
16
+ all-tests:
17
+ jobs:
18
+ - test:
19
+ matrix:
20
+ parameters:
21
+ ruby-version: ["2.7.5", "3.0.3"]
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # KindleManager
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/kindle_manager.svg)](https://badge.fury.io/rb/kindle_manager)
4
- [![Build Status](https://travis-ci.org/kyamaguchi/kindle_manager.svg?branch=master)](https://travis-ci.org/kyamaguchi/kindle_manager)
4
+ [![CircleCI](https://circleci.com/gh/kyamaguchi/kindle_manager.svg?style=svg)](https://circleci.com/gh/kyamaguchi/kindle_manager)
5
5
 
6
6
  Scrape information of kindle books & highlights from amazon site
7
7
 
@@ -34,7 +34,7 @@ Or install it yourself as:
34
34
 
35
35
  ### Setup
36
36
 
37
- [chromedriver](https://sites.google.com/a/chromium.org/chromedriver/downloads) is required. Please [download chromedriver](http://chromedriver.storage.googleapis.com/index.html) and update chromedriver regularly.
37
+ [chromedriver](https://sites.google.com/chromium.org/driver/) is required. Please [download chromedriver](https://chromedriver.storage.googleapis.com/index.html) and update chromedriver regularly.
38
38
 
39
39
  Create _.env_ following the instructions of https://github.com/kyamaguchi/amazon_auth
40
40
 
@@ -142,11 +142,6 @@ Login and password: `login: 'xxx', password: 'yyy'`
142
142
 
143
143
  Output debug log: `debug: true`
144
144
 
145
- ## Issues
146
-
147
- There may be problems with capybara 3.
148
- Use older version with `gem 'capybara', '~> 2.18.0'` in that case.
149
-
150
145
  ## TODO
151
146
 
152
147
  - Limit the number of fetching books by date
@@ -21,9 +21,10 @@ Gem::Specification.new do |spec|
21
21
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
22
  spec.require_paths = ["lib"]
23
23
 
24
- spec.add_runtime_dependency "amazon_auth", "~> 0.4.0"
24
+ spec.add_runtime_dependency "amazon_auth", "~> 0.8.0"
25
+ spec.add_runtime_dependency "webdrivers"
25
26
  spec.add_development_dependency "bundler"
26
- spec.add_development_dependency "rake", "~> 13.0"
27
- spec.add_development_dependency "rspec", "~> 3.0"
27
+ spec.add_development_dependency "rake"
28
+ spec.add_development_dependency "rspec"
28
29
  spec.add_development_dependency "byebug"
29
30
  end
@@ -1,6 +1,8 @@
1
1
  module KindleManager
2
2
  class BooksAdapter < BaseAdapter
3
- URL_FOR_KINDLE_CONTENTS = 'https://www.amazon.co.jp/gp/digital/fiona/manage?ie=UTF8&ref_=nav_youraccount_myk'
3
+ def url_for_kindle_contents
4
+ "https://www.#{ENV['AMAZON_DOMAIN']}/hz/mycd/digital-console/contentlist/booksAll/dateDsc/"
5
+ end
4
6
 
5
7
  def fetch
6
8
  go_to_kindle_management_page
@@ -10,17 +12,17 @@ module KindleManager
10
12
  puts "[ERROR] #{e}"
11
13
  puts e.backtrace
12
14
  puts
13
- puts "Retry manually -> client.adapter.load_next_kindle_list or client.session etc."
15
+ puts "Investigate the error using 'client.session', 'client.doc' etc."
14
16
  end
15
17
  end
16
18
 
17
19
  def go_to_kindle_management_page
18
20
  log "Visiting kindle management page"
19
21
  3.times do
20
- session.visit URL_FOR_KINDLE_CONTENTS
21
- wait_for_selector('.navHeader_myx')
22
- if session.first('.navHeader_myx')
23
- log "Page found '#{session.first('.navHeader_myx').text}'"
22
+ session.visit url_for_kindle_contents
23
+ wait_for_selector('#content-page-title')
24
+ if session.has_css?('#content-page-title')
25
+ log "Page found '#{session.first('#content-page-title').text}'"
24
26
  break
25
27
  else
26
28
  submit_signin_form
@@ -29,34 +31,15 @@ module KindleManager
29
31
  end
30
32
 
31
33
  def load_next_kindle_list
32
- wait_for_selector('.contentCount_myx')
33
- current_loop = 0
34
- last_page_scroll_offset = page_scroll_offset
35
- while current_loop <= max_scroll_attempts
36
- break if limit && limit < number_of_fetched_books
37
- if has_more_button?
38
- snapshot_page
39
- current_loop = 0
40
-
41
- log "Clicking 'Show More'"
42
- session.execute_script "window.scrollBy(0,-800)"
43
- show_more_button.click
44
- sleep fetching_interval
45
- else
46
- log "Loading books with scrolling #{current_loop+1}"
47
- session.execute_script "window.scrollBy(0,10000)"
48
- end
34
+ wait_for_selector('#CONTENT_COUNT')
35
+ loop do
36
+ snapshot_page
37
+ break if current_page == max_page
38
+ break if limit && limit <= number_of_fetched_books
39
+ session.first("#page-#{current_page + 1}").click
49
40
  sleep fetching_interval
50
- if last_page_scroll_offset == page_scroll_offset
51
- log "Stopping loading because 'page_scroll_offset' didn't change after a loop"
52
- break
53
- else
54
- debug "last_page_scroll_offset:#{last_page_scroll_offset} new page_scroll_offset:#{page_scroll_offset}"
55
- end
56
- last_page_scroll_offset = page_scroll_offset
57
- current_loop += 1
58
41
  end
59
- log "Stopped loading. You may want to resume with 'client.adapter.load_next_kindle_list'"
42
+ log "Stopped loading. You may want to continue with 'client.session', 'client.doc' etc."
60
43
  snapshot_page
61
44
  end
62
45
 
@@ -69,33 +52,26 @@ module KindleManager
69
52
  books.sort_by{|b| [-b.date.to_time.to_i, -b.fetched_at.to_i] }.uniq(&:asin)
70
53
  end
71
54
 
72
- def page_scroll_offset
73
- session.evaluate_script('window.pageYOffset')
74
- end
75
-
76
- def has_more_button?
77
- !!show_more_button
55
+ def current_page
56
+ doc.css('#pagination a.active').first.text.to_i
78
57
  end
79
58
 
80
- def show_more_button
81
- session.all('.contentTableShowMore_myx').find{|e| e['outerHTML'].match(/cnt_shw_more/) }
59
+ def max_page
60
+ @_max_page ||= doc.css('#pagination a').last.text.to_i
82
61
  end
83
62
 
84
63
  def number_of_fetched_books
85
- re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)(\d+)/ : /(\d+) - (\d+)/)
86
- wait_for_selector('.contentCount_myx')
87
- text = doc.css('.contentCount_myx').text
64
+ re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)から(\d+)/ : / (\d+) to (\d+) /)
65
+ wait_for_selector('#CONTENT_COUNT')
66
+ text = doc.css('#CONTENT_COUNT').text
67
+ log "Number of books: [#{text}]"
88
68
  m = text.match(re)
89
69
  return m[2].to_i if m.present?
90
70
  raise("Couldn't get the number of fetched books [#{text}]")
91
71
  end
92
72
 
93
- def loading?
94
- session.first('.myx-popover-loading-wrapper').present?
95
- end
96
-
97
73
  def snapshot_page
98
- if (text = doc.css('.contentCount_myx').try!(:text)).present?
74
+ if (text = doc.css('#CONTENT_COUNT').try!(:text)).present?
99
75
  log "Current page [#{text.to_s.gsub(/[[:space:]]+/, ' ').strip}]"
100
76
  end
101
77
  store.record_page
@@ -13,28 +13,33 @@ module KindleManager
13
13
  "#<#{self.class.name}:#{self.object_id} #{self.to_hash}>"
14
14
  end
15
15
 
16
+ def title_node
17
+ # Possible to use "div[id^='content-title-']"
18
+ @_title_node ||= @node.css('.digital_entity_title').first
19
+ end
20
+
16
21
  def asin
17
- @_asin ||= @node['name'].gsub(/\AcontentTabList_/, '')
22
+ @_asin ||= title_node.attributes['id'].value.remove('content-title-')
18
23
  end
19
24
 
20
25
  def title
21
- @_title ||= @node.css("div[id^='title']").text
26
+ @_title ||= title_node.text
22
27
  end
23
28
 
24
29
  def tag
25
- @_tag ||= @node.css("div[id^='listViewTitleTag']").css('.myx-text-bold').first.text.strip
30
+ @_tag ||= @node.css('.information_row.tags').first&.text&.strip
26
31
  end
27
32
 
28
33
  def author
29
- @_author ||= @node.css("div[id^='author']").text
34
+ @_author ||= @node.css("div[id^='content-author-']").text
30
35
  end
31
36
 
32
37
  def date
33
- @_date ||= parse_date(@node.css("div[id^='date']").text)
38
+ @_date ||= parse_date(@node.css("div[id^='content-acquired-date-']").text)
34
39
  end
35
40
 
36
41
  def collection_count
37
- @_collection_count ||= @node.css(".collectionsCount .myx-collection-count").first.text.strip.to_i
42
+ @_collection_count ||= @node.css('.dropdown_count').first&.text&.strip.to_i
38
43
  end
39
44
 
40
45
  def to_hash
@@ -48,7 +53,7 @@ module KindleManager
48
53
 
49
54
  def parse
50
55
  @_parsed ||= begin
51
- doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e, fetched_at: fetched_at) }
56
+ doc.css('#CONTENT_LIST table tbody tr').map{|e| BookRow.new(e, fetched_at: fetched_at) }
52
57
  end
53
58
  end
54
59
  end
@@ -11,7 +11,7 @@ module KindleManager
11
11
  begin
12
12
  Date.parse(date_text)
13
13
  rescue ArgumentError => e
14
- m = date_text.match(/\A(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日/)
14
+ m = date_text.match(/(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日/)
15
15
  m = date_text.match(/(?<month>\d{1,2})月\D+(?<day>\d{1,2}),\D+(?<year>\d{4})/) if m.nil?
16
16
  raise("Failed to parse date [#{date_text}]") if m.nil?
17
17
  Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
@@ -1,3 +1,3 @@
1
1
  module KindleManager
2
- VERSION = "0.6.4"
2
+ VERSION = "0.8.0"
3
3
  end
@@ -9,6 +9,7 @@ require "kindle_manager/parsers/common"
9
9
  require "kindle_manager/parsers/base_parser"
10
10
  require "kindle_manager/parsers/books_parser"
11
11
  require "kindle_manager/parsers/highlights_parser"
12
+ require 'webdrivers'
12
13
 
13
14
  module KindleManager
14
15
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kindle_manager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.4
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kazuho Yamaguchi
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-27 00:00:00.000000000 Z
11
+ date: 2022-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: amazon_auth
@@ -16,14 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.4.0
19
+ version: 0.8.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.4.0
26
+ version: 0.8.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: webdrivers
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -42,30 +56,30 @@ dependencies:
42
56
  name: rake
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
- - - "~>"
59
+ - - ">="
46
60
  - !ruby/object:Gem::Version
47
- version: '13.0'
61
+ version: '0'
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
- - - "~>"
66
+ - - ">="
53
67
  - !ruby/object:Gem::Version
54
- version: '13.0'
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: rspec
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
- - - "~>"
73
+ - - ">="
60
74
  - !ruby/object:Gem::Version
61
- version: '3.0'
75
+ version: '0'
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
- - - "~>"
80
+ - - ">="
67
81
  - !ruby/object:Gem::Version
68
- version: '3.0'
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: byebug
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -87,9 +101,9 @@ executables: []
87
101
  extensions: []
88
102
  extra_rdoc_files: []
89
103
  files:
104
+ - ".circleci/config.yml"
90
105
  - ".gitignore"
91
106
  - ".rspec"
92
- - ".travis.yml"
93
107
  - Gemfile
94
108
  - LICENSE.txt
95
109
  - README.md
@@ -112,7 +126,7 @@ homepage: https://github.com/kyamaguchi/kindle_manager
112
126
  licenses:
113
127
  - MIT
114
128
  metadata: {}
115
- post_install_message:
129
+ post_install_message:
116
130
  rdoc_options: []
117
131
  require_paths:
118
132
  - lib
@@ -127,8 +141,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
127
141
  - !ruby/object:Gem::Version
128
142
  version: '0'
129
143
  requirements: []
130
- rubygems_version: 3.0.8
131
- signing_key:
144
+ rubygems_version: 3.2.33
145
+ signing_key:
132
146
  specification_version: 4
133
147
  summary: Scrape information of kindle books from amazon site
134
148
  test_files: []
data/.travis.yml DELETED
@@ -1,7 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- rvm:
4
- - 2.4.5
5
- - 2.5.3
6
- - 2.6.0
7
- before_install: gem install bundler