kindle_manager 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +1 -1
- data/README.md +0 -5
- data/kindle_manager.gemspec +1 -1
- data/lib/kindle_manager/adapters/books_adapter.rb +24 -48
- data/lib/kindle_manager/parsers/books_parser.rb +12 -7
- data/lib/kindle_manager/parsers/common.rb +1 -1
- data/lib/kindle_manager/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e7115830b13fb4702b1e1cf89e6f1c06d3bac4a130994fde5c39d396b62d0372
|
4
|
+
data.tar.gz: 9689914d9059b669864c1c58b583b9594178fc8781e6b208c810f387bb0172e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a20739655225f96024977aee51ba86deb38befd7cd65a3fa5ea54ecff484b0b77d5b40936fa8c087b55ebb334986ddcb8a8664922458696705611e64ec190132
|
7
|
+
data.tar.gz: 7206fab0712baa2e7317549f12d422b3d4610bb8d7a0948406c9cc37412ca74c2b3c7a382eb2e4a148b9855ccaf8abf71165cde7f4d075caf17218e875f0f144
|
data/.circleci/config.yml
CHANGED
data/README.md
CHANGED
@@ -142,11 +142,6 @@ Login and password: `login: 'xxx', password: 'yyy'`
|
|
142
142
|
|
143
143
|
Output debug log: `debug: true`
|
144
144
|
|
145
|
-
## Issues
|
146
|
-
|
147
|
-
There may be problems with capybara 3.
|
148
|
-
Use older version with `gem 'capybara', '~> 2.18.0'` in that case.
|
149
|
-
|
150
145
|
## TODO
|
151
146
|
|
152
147
|
- Limit the number of fetching books by date
|
data/kindle_manager.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
22
|
spec.require_paths = ["lib"]
|
23
23
|
|
24
|
-
spec.add_runtime_dependency "amazon_auth", "~> 0.
|
24
|
+
spec.add_runtime_dependency "amazon_auth", "~> 0.8.0"
|
25
25
|
spec.add_development_dependency "bundler"
|
26
26
|
spec.add_development_dependency "rake"
|
27
27
|
spec.add_development_dependency "rspec"
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module KindleManager
|
2
2
|
class BooksAdapter < BaseAdapter
|
3
|
-
|
3
|
+
def url_for_kindle_contents
|
4
|
+
"https://www.#{ENV['AMAZON_DOMAIN']}/hz/mycd/digital-console/contentlist/booksAll/dateDsc/"
|
5
|
+
end
|
4
6
|
|
5
7
|
def fetch
|
6
8
|
go_to_kindle_management_page
|
@@ -10,17 +12,17 @@ module KindleManager
|
|
10
12
|
puts "[ERROR] #{e}"
|
11
13
|
puts e.backtrace
|
12
14
|
puts
|
13
|
-
puts "
|
15
|
+
puts "Investigate the error using 'client.session', 'client.doc' etc."
|
14
16
|
end
|
15
17
|
end
|
16
18
|
|
17
19
|
def go_to_kindle_management_page
|
18
20
|
log "Visiting kindle management page"
|
19
21
|
3.times do
|
20
|
-
session.visit
|
21
|
-
wait_for_selector('
|
22
|
-
if session.has_css?('
|
23
|
-
log "Page found '#{session.first('
|
22
|
+
session.visit url_for_kindle_contents
|
23
|
+
wait_for_selector('#content-page-title')
|
24
|
+
if session.has_css?('#content-page-title')
|
25
|
+
log "Page found '#{session.first('#content-page-title').text}'"
|
24
26
|
break
|
25
27
|
else
|
26
28
|
submit_signin_form
|
@@ -29,34 +31,15 @@ module KindleManager
|
|
29
31
|
end
|
30
32
|
|
31
33
|
def load_next_kindle_list
|
32
|
-
wait_for_selector('
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
break if limit && limit
|
37
|
-
|
38
|
-
snapshot_page
|
39
|
-
current_loop = 0
|
40
|
-
|
41
|
-
log "Clicking 'Show More'"
|
42
|
-
session.execute_script "window.scrollBy(0,-800)"
|
43
|
-
show_more_button.click
|
44
|
-
sleep fetching_interval
|
45
|
-
else
|
46
|
-
log "Loading books with scrolling #{current_loop+1}"
|
47
|
-
session.execute_script "window.scrollBy(0,10000)"
|
48
|
-
end
|
34
|
+
wait_for_selector('#CONTENT_COUNT')
|
35
|
+
loop do
|
36
|
+
snapshot_page
|
37
|
+
break if current_page == max_page
|
38
|
+
break if limit && limit <= number_of_fetched_books
|
39
|
+
session.first("#page-#{current_page + 1}").click
|
49
40
|
sleep fetching_interval
|
50
|
-
if last_page_scroll_offset == page_scroll_offset
|
51
|
-
log "Stopping loading because 'page_scroll_offset' didn't change after a loop"
|
52
|
-
break
|
53
|
-
else
|
54
|
-
debug "last_page_scroll_offset:#{last_page_scroll_offset} new page_scroll_offset:#{page_scroll_offset}"
|
55
|
-
end
|
56
|
-
last_page_scroll_offset = page_scroll_offset
|
57
|
-
current_loop += 1
|
58
41
|
end
|
59
|
-
log "Stopped loading. You may want to
|
42
|
+
log "Stopped loading. You may want to continue with 'client.session', 'client.doc' etc."
|
60
43
|
snapshot_page
|
61
44
|
end
|
62
45
|
|
@@ -69,33 +52,26 @@ module KindleManager
|
|
69
52
|
books.sort_by{|b| [-b.date.to_time.to_i, -b.fetched_at.to_i] }.uniq(&:asin)
|
70
53
|
end
|
71
54
|
|
72
|
-
def
|
73
|
-
|
74
|
-
end
|
75
|
-
|
76
|
-
def has_more_button?
|
77
|
-
!!show_more_button
|
55
|
+
def current_page
|
56
|
+
doc.css('#pagination a.active').first.text.to_i
|
78
57
|
end
|
79
58
|
|
80
|
-
def
|
81
|
-
|
59
|
+
def max_page
|
60
|
+
@_max_page ||= doc.css('#pagination a').last.text.to_i
|
82
61
|
end
|
83
62
|
|
84
63
|
def number_of_fetched_books
|
85
|
-
re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)
|
86
|
-
wait_for_selector('
|
87
|
-
text = doc.css('
|
64
|
+
re = (AmazonInfo.domain =~ /\.jp\z/ ? /(\d+)から(\d+)/ : / (\d+) to (\d+) /)
|
65
|
+
wait_for_selector('#CONTENT_COUNT')
|
66
|
+
text = doc.css('#CONTENT_COUNT').text
|
67
|
+
log "Number of books: [#{text}]"
|
88
68
|
m = text.match(re)
|
89
69
|
return m[2].to_i if m.present?
|
90
70
|
raise("Couldn't get the number of fetched books [#{text}]")
|
91
71
|
end
|
92
72
|
|
93
|
-
def loading?
|
94
|
-
session.first('.myx-popover-loading-wrapper').present?
|
95
|
-
end
|
96
|
-
|
97
73
|
def snapshot_page
|
98
|
-
if (text = doc.css('
|
74
|
+
if (text = doc.css('#CONTENT_COUNT').try!(:text)).present?
|
99
75
|
log "Current page [#{text.to_s.gsub(/[[:space:]]+/, ' ').strip}]"
|
100
76
|
end
|
101
77
|
store.record_page
|
@@ -13,28 +13,33 @@ module KindleManager
|
|
13
13
|
"#<#{self.class.name}:#{self.object_id} #{self.to_hash}>"
|
14
14
|
end
|
15
15
|
|
16
|
+
def title_node
|
17
|
+
# Possible to use "div[id^='content-title-']"
|
18
|
+
@_title_node ||= @node.css('.digital_entity_title').first
|
19
|
+
end
|
20
|
+
|
16
21
|
def asin
|
17
|
-
@_asin ||=
|
22
|
+
@_asin ||= title_node.attributes['id'].value.remove('content-title-')
|
18
23
|
end
|
19
24
|
|
20
25
|
def title
|
21
|
-
@_title ||=
|
26
|
+
@_title ||= title_node.text
|
22
27
|
end
|
23
28
|
|
24
29
|
def tag
|
25
|
-
@_tag ||= @node.css(
|
30
|
+
@_tag ||= @node.css('.information_row.tags').first&.text&.strip
|
26
31
|
end
|
27
32
|
|
28
33
|
def author
|
29
|
-
@_author ||= @node.css("div[id^='author']").text
|
34
|
+
@_author ||= @node.css("div[id^='content-author-']").text
|
30
35
|
end
|
31
36
|
|
32
37
|
def date
|
33
|
-
@_date ||= parse_date(@node.css("div[id^='date']").text)
|
38
|
+
@_date ||= parse_date(@node.css("div[id^='content-acquired-date-']").text)
|
34
39
|
end
|
35
40
|
|
36
41
|
def collection_count
|
37
|
-
@_collection_count ||= @node.css(
|
42
|
+
@_collection_count ||= @node.css('.dropdown_count').first&.text&.strip.to_i
|
38
43
|
end
|
39
44
|
|
40
45
|
def to_hash
|
@@ -48,7 +53,7 @@ module KindleManager
|
|
48
53
|
|
49
54
|
def parse
|
50
55
|
@_parsed ||= begin
|
51
|
-
doc.css(
|
56
|
+
doc.css('#CONTENT_LIST table tbody tr').map{|e| BookRow.new(e, fetched_at: fetched_at) }
|
52
57
|
end
|
53
58
|
end
|
54
59
|
end
|
@@ -11,7 +11,7 @@ module KindleManager
|
|
11
11
|
begin
|
12
12
|
Date.parse(date_text)
|
13
13
|
rescue ArgumentError => e
|
14
|
-
m = date_text.match(
|
14
|
+
m = date_text.match(/(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日/)
|
15
15
|
m = date_text.match(/(?<month>\d{1,2})月\D+(?<day>\d{1,2}),\D+(?<year>\d{4})/) if m.nil?
|
16
16
|
raise("Failed to parse date [#{date_text}]") if m.nil?
|
17
17
|
Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kindle_manager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kazuho Yamaguchi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amazon_auth
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.8.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 0.8.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -127,7 +127,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
127
127
|
- !ruby/object:Gem::Version
|
128
128
|
version: '0'
|
129
129
|
requirements: []
|
130
|
-
rubygems_version: 3.
|
130
|
+
rubygems_version: 3.4.10
|
131
131
|
signing_key:
|
132
132
|
specification_version: 4
|
133
133
|
summary: Scrape information of kindle books from amazon site
|