kindle_manager 0.6.4 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +21 -0
- data/README.md +2 -7
- data/kindle_manager.gemspec +4 -3
- data/lib/kindle_manager/adapters/books_adapter.rb +24 -48
- data/lib/kindle_manager/parsers/books_parser.rb +12 -7
- data/lib/kindle_manager/parsers/common.rb +1 -1
- data/lib/kindle_manager/version.rb +1 -1
- data/lib/kindle_manager.rb +1 -0
- metadata +31 -17
- data/.travis.yml +0 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a490117c413d9e85320298a280d32da6a1c0af1f97059af1cc89affbd40fc284
|
4
|
+
data.tar.gz: 6b1b31d1d72eedbe4605ea6b61ae243b624e36971aba14c31389f092f9a121f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a8d03445f3b6a517c1dffba41a214e37340a40e641936521c36905109c5977cfe84f7239f365a18dd503cc07ecc766f31e647f0f3e5666997f43e7e0123b2a74
|
7
|
+
data.tar.gz: 5a0fad36eeda000586c97cf343a83792f5ee98c12110fd248a24a5f9327590c01ba16b6c945e44725c7bcb281e5ccd2fb69ad2f71e9fa1b0481b1b571e2a3373
|
@@ -0,0 +1,21 @@
|
|
1
|
+
version: 2.1
|
2
|
+
|
3
|
+
jobs:
|
4
|
+
test:
|
5
|
+
parameters:
|
6
|
+
ruby-version:
|
7
|
+
type: string
|
8
|
+
docker:
|
9
|
+
- image: cimg/ruby:<< parameters.ruby-version >>-browsers
|
10
|
+
steps:
|
11
|
+
- checkout
|
12
|
+
- run: bundle install
|
13
|
+
- run: bundle exec rspec
|
14
|
+
|
15
|
+
workflows:
|
16
|
+
all-tests:
|
17
|
+
jobs:
|
18
|
+
- test:
|
19
|
+
matrix:
|
20
|
+
parameters:
|
21
|
+
ruby-version: ["2.7.5", "3.0.3"]
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# KindleManager
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/kindle_manager.svg)](https://badge.fury.io/rb/kindle_manager)
|
4
|
-
[![
|
4
|
+
[![CircleCI](https://circleci.com/gh/kyamaguchi/kindle_manager.svg?style=svg)](https://circleci.com/gh/kyamaguchi/kindle_manager)
|
5
5
|
|
6
6
|
Scrape information of kindle books & highlights from amazon site
|
7
7
|
|
@@ -34,7 +34,7 @@ Or install it yourself as:
|
|
34
34
|
|
35
35
|
### Setup
|
36
36
|
|
37
|
-
[chromedriver](https://sites.google.com/
|
37
|
+
[chromedriver](https://sites.google.com/chromium.org/driver/) is required. Please [download chromedriver](https://chromedriver.storage.googleapis.com/index.html) and update chromedriver regularly.
|
38
38
|
|
39
39
|
Create _.env_ following the instructions of https://github.com/kyamaguchi/amazon_auth
|
40
40
|
|
@@ -142,11 +142,6 @@ Login and password: `login: 'xxx', password: 'yyy'`
|
|
142
142
|
|
143
143
|
Output debug log: `debug: true`
|
144
144
|
|
145
|
-
## Issues
|
146
|
-
|
147
|
-
There may be problems with capybara 3.
|
148
|
-
Use older version with `gem 'capybara', '~> 2.18.0'` in that case.
|
149
|
-
|
150
145
|
## TODO
|
151
146
|
|
152
147
|
- Limit the number of fetching books by date
|
data/kindle_manager.gemspec
CHANGED
@@ -21,9 +21,10 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
22
|
spec.require_paths = ["lib"]
|
23
23
|
|
24
|
-
spec.add_runtime_dependency "amazon_auth", "~> 0.
|
24
|
+
spec.add_runtime_dependency "amazon_auth", "~> 0.8.0"
|
25
|
+
spec.add_runtime_dependency "webdrivers"
|
25
26
|
spec.add_development_dependency "bundler"
|
26
|
-
spec.add_development_dependency "rake"
|
27
|
-
spec.add_development_dependency "rspec"
|
27
|
+
spec.add_development_dependency "rake"
|
28
|
+
spec.add_development_dependency "rspec"
|
28
29
|
spec.add_development_dependency "byebug"
|
29
30
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module KindleManager
|
2
2
|
class BooksAdapter < BaseAdapter
|
3
|
-
|
3
|
+
def url_for_kindle_contents
|
4
|
+
"https://www.#{ENV['AMAZON_DOMAIN']}/hz/mycd/digital-console/contentlist/booksAll/dateDsc/"
|
5
|
+
end
|
4
6
|
|
5
7
|
def fetch
|
6
8
|
go_to_kindle_management_page
|
@@ -10,17 +12,17 @@ module KindleManager
|
|
10
12
|
puts "[ERROR] #{e}"
|
11
13
|
puts e.backtrace
|
12
14
|
puts
|
13
|
-
puts "
|
15
|
+
puts "Investigate the error using 'client.session', 'client.doc' etc."
|
14
16
|
end
|
15
17
|
end
|
16
18
|
|
17
19
|
def go_to_kindle_management_page
|
18
20
|
log "Visiting kindle management page"
|
19
21
|
3.times do
|
20
|
-
session.visit
|
21
|
-
wait_for_selector('
|
22
|
-
if session.
|
23
|
-
log "Page found '#{session.first('
|
22
|
+
session.visit url_for_kindle_contents
|
23
|
+
wait_for_selector('#content-page-title')
|
24
|
+
if session.has_css?('#content-page-title')
|
25
|
+
log "Page found '#{session.first('#content-page-title').text}'"
|
24
26
|
break
|
25
27
|
else
|
26
28
|
submit_signin_form
|
@@ -29,34 +31,15 @@ module KindleManager
|
|
29
31
|
end
|
30
32
|
|
31
33
|
def load_next_kindle_list
|
32
|
-
wait_for_selector('
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
break if limit && limit
|
37
|
-
|
38
|
-
snapshot_page
|
39
|
-
current_loop = 0
|
40
|
-
|
41
|
-
log "Clicking 'Show More'"
|
42
|
-
session.execute_script "window.scrollBy(0,-800)"
|
43
|
-
show_more_button.click
|
44
|
-
sleep fetching_interval
|
45
|
-
else
|
46
|
-
log "Loading books with scrolling #{current_loop+1}"
|
47
|
-
session.execute_script "window.scrollBy(0,10000)"
|
48
|
-
end
|
34
|
+
wait_for_selector('#CONTENT_COUNT')
|
35
|
+
loop do
|
36
|
+
snapshot_page
|
37
|
+
break if current_page == max_page
|
38
|
+
break if limit && limit <= number_of_fetched_books
|
39
|
+
session.first("#page-#{current_page + 1}").click
|
49
40
|
sleep fetching_interval
|
50
|
-
if last_page_scroll_offset == page_scroll_offset
|
51
|
-
log "Stopping loading because 'page_scroll_offset' didn't change after a loop"
|
52
|
-
break
|
53
|
-
else
|
54
|
-
debug "last_page_scroll_offset:#{last_page_scroll_offset} new page_scroll_offset:#{page_scroll_offset}"
|
55
|
-
end
|
56
|
-
last_page_scroll_offset = page_scroll_offset
|
57
|
-
current_loop += 1
|
58
41
|
end
|
59
|
-
log "Stopped loading. You may want to
|
42
|
+
log "Stopped loading. You may want to continue with 'client.session', 'client.doc' etc."
|
60
43
|
snapshot_page
|
61
44
|
end
|
62
45
|
|
@@ -69,33 +52,26 @@ module KindleManager
|
|
69
52
|
books.sort_by{|b| [-b.date.to_time.to_i, -b.fetched_at.to_i] }.uniq(&:asin)
|
70
53
|
end
|
71
54
|
|
72
|
-
def
|
73
|
-
|
74
|
-
end
|
75
|
-
|
76
|
-
def has_more_button?
|
77
|
-
!!show_more_button
|
55
|
+
def current_page
|
56
|
+
doc.css('#pagination a.active').first.text.to_i
|
78
57
|
end
|
79
58
|
|
80
|
-
def
|
81
|
-
|
59
|
+
def max_page
|
60
|
+
@_max_page ||= doc.css('#pagination a').last.text.to_i
|
82
61
|
end
|
83
62
|
|
84
63
|
def number_of_fetched_books
|
85
|
-
re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)
|
86
|
-
wait_for_selector('
|
87
|
-
text = doc.css('
|
64
|
+
re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)から(\d+)/ : / (\d+) to (\d+) /)
|
65
|
+
wait_for_selector('#CONTENT_COUNT')
|
66
|
+
text = doc.css('#CONTENT_COUNT').text
|
67
|
+
log "Number of books: [#{text}]"
|
88
68
|
m = text.match(re)
|
89
69
|
return m[2].to_i if m.present?
|
90
70
|
raise("Couldn't get the number of fetched books [#{text}]")
|
91
71
|
end
|
92
72
|
|
93
|
-
def loading?
|
94
|
-
session.first('.myx-popover-loading-wrapper').present?
|
95
|
-
end
|
96
|
-
|
97
73
|
def snapshot_page
|
98
|
-
if (text = doc.css('
|
74
|
+
if (text = doc.css('#CONTENT_COUNT').try!(:text)).present?
|
99
75
|
log "Current page [#{text.to_s.gsub(/[[:space:]]+/, ' ').strip}]"
|
100
76
|
end
|
101
77
|
store.record_page
|
@@ -13,28 +13,33 @@ module KindleManager
|
|
13
13
|
"#<#{self.class.name}:#{self.object_id} #{self.to_hash}>"
|
14
14
|
end
|
15
15
|
|
16
|
+
def title_node
|
17
|
+
# Possible to use "div[id^='content-title-']"
|
18
|
+
@_title_node ||= @node.css('.digital_entity_title').first
|
19
|
+
end
|
20
|
+
|
16
21
|
def asin
|
17
|
-
@_asin ||=
|
22
|
+
@_asin ||= title_node.attributes['id'].value.remove('content-title-')
|
18
23
|
end
|
19
24
|
|
20
25
|
def title
|
21
|
-
@_title ||=
|
26
|
+
@_title ||= title_node.text
|
22
27
|
end
|
23
28
|
|
24
29
|
def tag
|
25
|
-
@_tag ||= @node.css(
|
30
|
+
@_tag ||= @node.css('.information_row.tags').first&.text&.strip
|
26
31
|
end
|
27
32
|
|
28
33
|
def author
|
29
|
-
@_author ||= @node.css("div[id^='author']").text
|
34
|
+
@_author ||= @node.css("div[id^='content-author-']").text
|
30
35
|
end
|
31
36
|
|
32
37
|
def date
|
33
|
-
@_date ||= parse_date(@node.css("div[id^='date']").text)
|
38
|
+
@_date ||= parse_date(@node.css("div[id^='content-acquired-date-']").text)
|
34
39
|
end
|
35
40
|
|
36
41
|
def collection_count
|
37
|
-
@_collection_count ||= @node.css(
|
42
|
+
@_collection_count ||= @node.css('.dropdown_count').first&.text&.strip.to_i
|
38
43
|
end
|
39
44
|
|
40
45
|
def to_hash
|
@@ -48,7 +53,7 @@ module KindleManager
|
|
48
53
|
|
49
54
|
def parse
|
50
55
|
@_parsed ||= begin
|
51
|
-
doc.css(
|
56
|
+
doc.css('#CONTENT_LIST table tbody tr').map{|e| BookRow.new(e, fetched_at: fetched_at) }
|
52
57
|
end
|
53
58
|
end
|
54
59
|
end
|
@@ -11,7 +11,7 @@ module KindleManager
|
|
11
11
|
begin
|
12
12
|
Date.parse(date_text)
|
13
13
|
rescue ArgumentError => e
|
14
|
-
m = date_text.match(
|
14
|
+
m = date_text.match(/(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日/)
|
15
15
|
m = date_text.match(/(?<month>\d{1,2})月\D+(?<day>\d{1,2}),\D+(?<year>\d{4})/) if m.nil?
|
16
16
|
raise("Failed to parse date [#{date_text}]") if m.nil?
|
17
17
|
Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
|
data/lib/kindle_manager.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kindle_manager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kazuho Yamaguchi
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amazon_auth
|
@@ -16,14 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.8.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 0.8.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: webdrivers
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -42,30 +56,30 @@ dependencies:
|
|
42
56
|
name: rake
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
|
-
- - "
|
59
|
+
- - ">="
|
46
60
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
61
|
+
version: '0'
|
48
62
|
type: :development
|
49
63
|
prerelease: false
|
50
64
|
version_requirements: !ruby/object:Gem::Requirement
|
51
65
|
requirements:
|
52
|
-
- - "
|
66
|
+
- - ">="
|
53
67
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: rspec
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
58
72
|
requirements:
|
59
|
-
- - "
|
73
|
+
- - ">="
|
60
74
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
75
|
+
version: '0'
|
62
76
|
type: :development
|
63
77
|
prerelease: false
|
64
78
|
version_requirements: !ruby/object:Gem::Requirement
|
65
79
|
requirements:
|
66
|
-
- - "
|
80
|
+
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: byebug
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -87,9 +101,9 @@ executables: []
|
|
87
101
|
extensions: []
|
88
102
|
extra_rdoc_files: []
|
89
103
|
files:
|
104
|
+
- ".circleci/config.yml"
|
90
105
|
- ".gitignore"
|
91
106
|
- ".rspec"
|
92
|
-
- ".travis.yml"
|
93
107
|
- Gemfile
|
94
108
|
- LICENSE.txt
|
95
109
|
- README.md
|
@@ -112,7 +126,7 @@ homepage: https://github.com/kyamaguchi/kindle_manager
|
|
112
126
|
licenses:
|
113
127
|
- MIT
|
114
128
|
metadata: {}
|
115
|
-
post_install_message:
|
129
|
+
post_install_message:
|
116
130
|
rdoc_options: []
|
117
131
|
require_paths:
|
118
132
|
- lib
|
@@ -127,8 +141,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
127
141
|
- !ruby/object:Gem::Version
|
128
142
|
version: '0'
|
129
143
|
requirements: []
|
130
|
-
rubygems_version: 3.
|
131
|
-
signing_key:
|
144
|
+
rubygems_version: 3.2.33
|
145
|
+
signing_key:
|
132
146
|
specification_version: 4
|
133
147
|
summary: Scrape information of kindle books from amazon site
|
134
148
|
test_files: []
|