kindle_manager 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -5
- data/kindle_manager.gemspec +2 -1
- data/lib/kindle_manager/adapters/books_adapter.rb +24 -48
- data/lib/kindle_manager/parsers/books_parser.rb +12 -7
- data/lib/kindle_manager/parsers/common.rb +1 -1
- data/lib/kindle_manager/version.rb +1 -1
- data/lib/kindle_manager.rb +1 -0
- metadata +19 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a490117c413d9e85320298a280d32da6a1c0af1f97059af1cc89affbd40fc284
|
|
4
|
+
data.tar.gz: 6b1b31d1d72eedbe4605ea6b61ae243b624e36971aba14c31389f092f9a121f8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a8d03445f3b6a517c1dffba41a214e37340a40e641936521c36905109c5977cfe84f7239f365a18dd503cc07ecc766f31e647f0f3e5666997f43e7e0123b2a74
|
|
7
|
+
data.tar.gz: 5a0fad36eeda000586c97cf343a83792f5ee98c12110fd248a24a5f9327590c01ba16b6c945e44725c7bcb281e5ccd2fb69ad2f71e9fa1b0481b1b571e2a3373
|
data/README.md
CHANGED
|
@@ -142,11 +142,6 @@ Login and password: `login: 'xxx', password: 'yyy'`
|
|
|
142
142
|
|
|
143
143
|
Output debug log: `debug: true`
|
|
144
144
|
|
|
145
|
-
## Issues
|
|
146
|
-
|
|
147
|
-
There may be problems with capybara 3.
|
|
148
|
-
Use older version with `gem 'capybara', '~> 2.18.0'` in that case.
|
|
149
|
-
|
|
150
145
|
## TODO
|
|
151
146
|
|
|
152
147
|
- Limit the number of fetching books by date
|
data/kindle_manager.gemspec
CHANGED
|
@@ -21,7 +21,8 @@ Gem::Specification.new do |spec|
|
|
|
21
21
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
22
22
|
spec.require_paths = ["lib"]
|
|
23
23
|
|
|
24
|
-
spec.add_runtime_dependency "amazon_auth", "~> 0.
|
|
24
|
+
spec.add_runtime_dependency "amazon_auth", "~> 0.8.0"
|
|
25
|
+
spec.add_runtime_dependency "webdrivers"
|
|
25
26
|
spec.add_development_dependency "bundler"
|
|
26
27
|
spec.add_development_dependency "rake"
|
|
27
28
|
spec.add_development_dependency "rspec"
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
module KindleManager
|
|
2
2
|
class BooksAdapter < BaseAdapter
|
|
3
|
-
|
|
3
|
+
def url_for_kindle_contents
|
|
4
|
+
"https://www.#{ENV['AMAZON_DOMAIN']}/hz/mycd/digital-console/contentlist/booksAll/dateDsc/"
|
|
5
|
+
end
|
|
4
6
|
|
|
5
7
|
def fetch
|
|
6
8
|
go_to_kindle_management_page
|
|
@@ -10,17 +12,17 @@ module KindleManager
|
|
|
10
12
|
puts "[ERROR] #{e}"
|
|
11
13
|
puts e.backtrace
|
|
12
14
|
puts
|
|
13
|
-
puts "
|
|
15
|
+
puts "Investigate the error using 'client.session', 'client.doc' etc."
|
|
14
16
|
end
|
|
15
17
|
end
|
|
16
18
|
|
|
17
19
|
def go_to_kindle_management_page
|
|
18
20
|
log "Visiting kindle management page"
|
|
19
21
|
3.times do
|
|
20
|
-
session.visit
|
|
21
|
-
wait_for_selector('
|
|
22
|
-
if session.has_css?('
|
|
23
|
-
log "Page found '#{session.first('
|
|
22
|
+
session.visit url_for_kindle_contents
|
|
23
|
+
wait_for_selector('#content-page-title')
|
|
24
|
+
if session.has_css?('#content-page-title')
|
|
25
|
+
log "Page found '#{session.first('#content-page-title').text}'"
|
|
24
26
|
break
|
|
25
27
|
else
|
|
26
28
|
submit_signin_form
|
|
@@ -29,34 +31,15 @@ module KindleManager
|
|
|
29
31
|
end
|
|
30
32
|
|
|
31
33
|
def load_next_kindle_list
|
|
32
|
-
wait_for_selector('
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
break if limit && limit
|
|
37
|
-
|
|
38
|
-
snapshot_page
|
|
39
|
-
current_loop = 0
|
|
40
|
-
|
|
41
|
-
log "Clicking 'Show More'"
|
|
42
|
-
session.execute_script "window.scrollBy(0,-800)"
|
|
43
|
-
show_more_button.click
|
|
44
|
-
sleep fetching_interval
|
|
45
|
-
else
|
|
46
|
-
log "Loading books with scrolling #{current_loop+1}"
|
|
47
|
-
session.execute_script "window.scrollBy(0,10000)"
|
|
48
|
-
end
|
|
34
|
+
wait_for_selector('#CONTENT_COUNT')
|
|
35
|
+
loop do
|
|
36
|
+
snapshot_page
|
|
37
|
+
break if current_page == max_page
|
|
38
|
+
break if limit && limit <= number_of_fetched_books
|
|
39
|
+
session.first("#page-#{current_page + 1}").click
|
|
49
40
|
sleep fetching_interval
|
|
50
|
-
if last_page_scroll_offset == page_scroll_offset
|
|
51
|
-
log "Stopping loading because 'page_scroll_offset' didn't change after a loop"
|
|
52
|
-
break
|
|
53
|
-
else
|
|
54
|
-
debug "last_page_scroll_offset:#{last_page_scroll_offset} new page_scroll_offset:#{page_scroll_offset}"
|
|
55
|
-
end
|
|
56
|
-
last_page_scroll_offset = page_scroll_offset
|
|
57
|
-
current_loop += 1
|
|
58
41
|
end
|
|
59
|
-
log "Stopped loading. You may want to
|
|
42
|
+
log "Stopped loading. You may want to continue with 'client.session', 'client.doc' etc."
|
|
60
43
|
snapshot_page
|
|
61
44
|
end
|
|
62
45
|
|
|
@@ -69,33 +52,26 @@ module KindleManager
|
|
|
69
52
|
books.sort_by{|b| [-b.date.to_time.to_i, -b.fetched_at.to_i] }.uniq(&:asin)
|
|
70
53
|
end
|
|
71
54
|
|
|
72
|
-
def
|
|
73
|
-
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
def has_more_button?
|
|
77
|
-
!!show_more_button
|
|
55
|
+
def current_page
|
|
56
|
+
doc.css('#pagination a.active').first.text.to_i
|
|
78
57
|
end
|
|
79
58
|
|
|
80
|
-
def
|
|
81
|
-
|
|
59
|
+
def max_page
|
|
60
|
+
@_max_page ||= doc.css('#pagination a').last.text.to_i
|
|
82
61
|
end
|
|
83
62
|
|
|
84
63
|
def number_of_fetched_books
|
|
85
|
-
re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)
|
|
86
|
-
wait_for_selector('
|
|
87
|
-
text = doc.css('
|
|
64
|
+
re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)から(\d+)/ : / (\d+) to (\d+) /)
|
|
65
|
+
wait_for_selector('#CONTENT_COUNT')
|
|
66
|
+
text = doc.css('#CONTENT_COUNT').text
|
|
67
|
+
log "Number of books: [#{text}]"
|
|
88
68
|
m = text.match(re)
|
|
89
69
|
return m[2].to_i if m.present?
|
|
90
70
|
raise("Couldn't get the number of fetched books [#{text}]")
|
|
91
71
|
end
|
|
92
72
|
|
|
93
|
-
def loading?
|
|
94
|
-
session.first('.myx-popover-loading-wrapper').present?
|
|
95
|
-
end
|
|
96
|
-
|
|
97
73
|
def snapshot_page
|
|
98
|
-
if (text = doc.css('
|
|
74
|
+
if (text = doc.css('#CONTENT_COUNT').try!(:text)).present?
|
|
99
75
|
log "Current page [#{text.to_s.gsub(/[[:space:]]+/, ' ').strip}]"
|
|
100
76
|
end
|
|
101
77
|
store.record_page
|
|
@@ -13,28 +13,33 @@ module KindleManager
|
|
|
13
13
|
"#<#{self.class.name}:#{self.object_id} #{self.to_hash}>"
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
+
def title_node
|
|
17
|
+
# Possible to use "div[id^='content-title-']"
|
|
18
|
+
@_title_node ||= @node.css('.digital_entity_title').first
|
|
19
|
+
end
|
|
20
|
+
|
|
16
21
|
def asin
|
|
17
|
-
@_asin ||=
|
|
22
|
+
@_asin ||= title_node.attributes['id'].value.remove('content-title-')
|
|
18
23
|
end
|
|
19
24
|
|
|
20
25
|
def title
|
|
21
|
-
@_title ||=
|
|
26
|
+
@_title ||= title_node.text
|
|
22
27
|
end
|
|
23
28
|
|
|
24
29
|
def tag
|
|
25
|
-
@_tag ||= @node.css(
|
|
30
|
+
@_tag ||= @node.css('.information_row.tags').first&.text&.strip
|
|
26
31
|
end
|
|
27
32
|
|
|
28
33
|
def author
|
|
29
|
-
@_author ||= @node.css("div[id^='author']").text
|
|
34
|
+
@_author ||= @node.css("div[id^='content-author-']").text
|
|
30
35
|
end
|
|
31
36
|
|
|
32
37
|
def date
|
|
33
|
-
@_date ||= parse_date(@node.css("div[id^='date']").text)
|
|
38
|
+
@_date ||= parse_date(@node.css("div[id^='content-acquired-date-']").text)
|
|
34
39
|
end
|
|
35
40
|
|
|
36
41
|
def collection_count
|
|
37
|
-
@_collection_count ||= @node.css(
|
|
42
|
+
@_collection_count ||= @node.css('.dropdown_count').first&.text&.strip.to_i
|
|
38
43
|
end
|
|
39
44
|
|
|
40
45
|
def to_hash
|
|
@@ -48,7 +53,7 @@ module KindleManager
|
|
|
48
53
|
|
|
49
54
|
def parse
|
|
50
55
|
@_parsed ||= begin
|
|
51
|
-
doc.css(
|
|
56
|
+
doc.css('#CONTENT_LIST table tbody tr').map{|e| BookRow.new(e, fetched_at: fetched_at) }
|
|
52
57
|
end
|
|
53
58
|
end
|
|
54
59
|
end
|
|
@@ -11,7 +11,7 @@ module KindleManager
|
|
|
11
11
|
begin
|
|
12
12
|
Date.parse(date_text)
|
|
13
13
|
rescue ArgumentError => e
|
|
14
|
-
m = date_text.match(
|
|
14
|
+
m = date_text.match(/(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日/)
|
|
15
15
|
m = date_text.match(/(?<month>\d{1,2})月\D+(?<day>\d{1,2}),\D+(?<year>\d{4})/) if m.nil?
|
|
16
16
|
raise("Failed to parse date [#{date_text}]") if m.nil?
|
|
17
17
|
Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
|
data/lib/kindle_manager.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kindle_manager
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.8.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kazuho Yamaguchi
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2022-11-06 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: amazon_auth
|
|
@@ -16,14 +16,28 @@ dependencies:
|
|
|
16
16
|
requirements:
|
|
17
17
|
- - "~>"
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 0.
|
|
19
|
+
version: 0.8.0
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
24
|
- - "~>"
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: 0.
|
|
26
|
+
version: 0.8.0
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: webdrivers
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0'
|
|
27
41
|
- !ruby/object:Gem::Dependency
|
|
28
42
|
name: bundler
|
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -127,7 +141,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
127
141
|
- !ruby/object:Gem::Version
|
|
128
142
|
version: '0'
|
|
129
143
|
requirements: []
|
|
130
|
-
rubygems_version: 3.
|
|
144
|
+
rubygems_version: 3.2.33
|
|
131
145
|
signing_key:
|
|
132
146
|
specification_version: 4
|
|
133
147
|
summary: Scrape information of kindle books from amazon site
|